{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\software\\anaconda\\lib\\site-packages\\scipy\\__init__.py:173: UserWarning: A NumPy version >=1.19.5 and <1.27.0 is required for this version of SciPy (detected version 1.19.2)\n",
      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n",
      "D:\\software\\anaconda\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
      "D:\\software\\anaconda\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
      "D:\\software\\anaconda\\lib\\site-packages\\dask\\dataframe\\utils.py:367: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "numpy.ndarray size changed, may indicate binary incompatibility. Expected 96 from C header, got 80 from PyObject",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mValueError\u001B[0m                                Traceback (most recent call last)",
      "\u001B[1;32m<ipython-input-1-1b33e02af9b8>\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      5\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0msklearn\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mlinear_model\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mSGDRegressor\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mLinearRegression\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mRidge\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      6\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0msklearn\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpreprocessing\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mMinMaxScaler\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 7\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mgensim\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmodels\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mWord2Vec\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      8\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mmath\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      9\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mnumpy\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mnp\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;32mD:\\software\\anaconda\\lib\\site-packages\\gensim\\__init__.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      9\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mlogging\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     10\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 11\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mgensim\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mparsing\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mcorpora\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mmatutils\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0minterfaces\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mmodels\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0msimilarities\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mutils\u001B[0m  \u001B[1;31m# noqa:F401\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m     12\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     13\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;32mD:\\software\\anaconda\\lib\\site-packages\\gensim\\corpora\\__init__.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      5\u001B[0m \u001B[1;31m# bring corpus classes directly into package namespace, to save some typing\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 6\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[1;33m.\u001B[0m\u001B[0mindexedcorpus\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mIndexedCorpus\u001B[0m  \u001B[1;31m# noqa:F401 must appear before the other classes\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      7\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      8\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[1;33m.\u001B[0m\u001B[0mmmcorpus\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mMmCorpus\u001B[0m  \u001B[1;31m# noqa:F401\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;32mD:\\software\\anaconda\\lib\\site-packages\\gensim\\corpora\\indexedcorpus.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m     12\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mnumpy\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     13\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 14\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mgensim\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0minterfaces\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mutils\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m     15\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     16\u001B[0m \u001B[0mlogger\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mlogging\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mgetLogger\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0m__name__\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;32mD:\\software\\anaconda\\lib\\site-packages\\gensim\\interfaces.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m     17\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mlogging\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     18\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 19\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mgensim\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mutils\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mmatutils\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m     20\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     21\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;32mD:\\software\\anaconda\\lib\\site-packages\\gensim\\matutils.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m   1028\u001B[0m \u001B[1;32mtry\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m   1029\u001B[0m     \u001B[1;31m# try to load fast, cythonized code if possible\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m-> 1030\u001B[1;33m     \u001B[1;32mfrom\u001B[0m \u001B[0mgensim\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_matutils\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mlogsumexp\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mmean_absolute_difference\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mdirichlet_expectation\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m   1031\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m   1032\u001B[0m \u001B[1;32mexcept\u001B[0m \u001B[0mImportError\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;32mD:\\software\\anaconda\\lib\\site-packages\\gensim\\_matutils.pyx\u001B[0m in \u001B[0;36minit gensim._matutils\u001B[1;34m()\u001B[0m\n",
      "\u001B[1;31mValueError\u001B[0m: numpy.ndarray size changed, may indicate binary incompatibility. Expected 96 from C header, got 80 from PyObject"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import gc\n",
    "import lightgbm as lgb\n",
    "from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from gensim.models import Word2Vec\n",
    "import math\n",
    "import numpy as np\n",
    "from tqdm import tqdm\n",
    "from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold\n",
    "from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, log_loss\n",
    "import matplotlib.pyplot as plt\n",
    "import time\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 数据读取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "disease_feature1 = pd.read_csv(\"data/训练集/disease_feature1.csv\")\n",
    "disease_feature2 = pd.read_csv(\"data/训练集/disease_feature2.csv\")\n",
    "disease_feature3 = pd.read_csv(\"data/训练集/disease_feature3.csv\")\n",
    "\n",
    "train_answer = pd.read_csv(\"data/训练集/train_answer.csv\")\n",
    "train_food = pd.read_csv(\"data/训练集/train_food.csv\")\n",
    "\n",
    "preliminary_a_food = pd.read_csv(\"data/初赛A榜测试集/preliminary_a_food.csv\")\n",
    "preliminary_a_submit_sample = pd.read_csv(\"data/初赛A榜测试集/preliminary_a_submit_sample.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "del preliminary_a_submit_sample['related_prob']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>food_id</th>\n",
       "      <th>disease_id</th>\n",
       "      <th>related</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_998</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_861</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_559</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_841</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_81</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  food_id   disease_id  related\n",
       "0  food_0  disease_998      0.0\n",
       "1  food_0  disease_861      0.0\n",
       "2  food_0  disease_559      0.0\n",
       "3  food_0  disease_841      0.0\n",
       "4  food_0   disease_81      0.0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.concat([train_answer, preliminary_a_submit_sample], axis = 0).reset_index(drop=True)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "这里直接使用每个变量后的数字进行编码，当然也可以使用labelencoder的方式。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "data['food'] = data['food_id'].apply(lambda x : int(x.split('_')[1]))\n",
    "data['disease'] = data['disease_id'].apply(lambda x : int(x.split('_')[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>food_id</th>\n",
       "      <th>N_0</th>\n",
       "      <th>N_1</th>\n",
       "      <th>N_2</th>\n",
       "      <th>N_3</th>\n",
       "      <th>N_4</th>\n",
       "      <th>N_5</th>\n",
       "      <th>N_6</th>\n",
       "      <th>N_7</th>\n",
       "      <th>N_8</th>\n",
       "      <th>...</th>\n",
       "      <th>N_202</th>\n",
       "      <th>N_203</th>\n",
       "      <th>N_204</th>\n",
       "      <th>N_205</th>\n",
       "      <th>N_206</th>\n",
       "      <th>N_207</th>\n",
       "      <th>N_208</th>\n",
       "      <th>N_209</th>\n",
       "      <th>N_210</th>\n",
       "      <th>N_211</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>food_0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.5</td>\n",
       "      <td>92.82</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>food_1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23.90</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.41</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>food_4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.5</td>\n",
       "      <td>15.46</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>food_5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.068</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.045</td>\n",
       "      <td>0.75</td>\n",
       "      <td>0.314</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.89</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.3</td>\n",
       "      <td>86.35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>food_6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.115</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.091</td>\n",
       "      <td>0.58</td>\n",
       "      <td>0.508</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>41.6</td>\n",
       "      <td>93.22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.54</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 213 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  food_id  N_0  N_1  N_2    N_3  N_4    N_5   N_6    N_7  N_8  ...  N_202  \\\n",
       "0  food_0  NaN  NaN  NaN    NaN  0.0    NaN   NaN    NaN  NaN  ...    NaN   \n",
       "1  food_1  NaN  NaN  NaN    NaN  0.0    NaN   NaN    NaN  NaN  ...    NaN   \n",
       "2  food_4  NaN  NaN  NaN    NaN  0.0    NaN   NaN    NaN  NaN  ...    NaN   \n",
       "3  food_5  NaN  NaN  NaN  0.068  0.0  0.045  0.75  0.314  NaN  ...    NaN   \n",
       "4  food_6  NaN  NaN  NaN  0.115  0.0  0.091  0.58  0.508  NaN  ...    NaN   \n",
       "\n",
       "   N_203  N_204  N_205  N_206  N_207  N_208  N_209  N_210  N_211  \n",
       "0    NaN   0.02    0.0    NaN    NaN   30.5  92.82    NaN   0.92  \n",
       "1    NaN  23.90    0.0    NaN    NaN    0.0   2.41    NaN   3.31  \n",
       "2    NaN   0.12    0.0    NaN    NaN    3.5  15.46    NaN   0.36  \n",
       "3    NaN   0.89    0.0    NaN    NaN    3.3  86.35    NaN   0.20  \n",
       "4    NaN   1.13    0.0    0.0    NaN   41.6  93.22    NaN   0.54  \n",
       "\n",
       "[5 rows x 213 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food = pd.concat([train_food, preliminary_a_food], axis = 0).reset_index(drop=True)\n",
    "food.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 目标编码\n",
    "\n",
    "由于本题只有两个离散变量food_id和disease_id，而测试集中都是新的foodid。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 17.24it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 17.85it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.16it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 17.86it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 17.54it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.38it/s]\n"
     ]
    }
   ],
   "source": [
    "cat_list = ['disease']\n",
    "def stat(df, df_merge, group_by, agg):\n",
    "    group = df.groupby(group_by).agg(agg)\n",
    "\n",
    "    columns = []\n",
    "    for on, methods in agg.items():\n",
    "        for method in methods:\n",
    "            columns.append('{}_{}_{}'.format('_'.join(group_by), on, method))\n",
    "    group.columns = columns\n",
    "    group.reset_index(inplace=True)\n",
    "    df_merge = df_merge.merge(group, on=group_by, how='left')\n",
    "\n",
    "    del (group)\n",
    "    gc.collect()\n",
    "    return df_merge\n",
    "\n",
    "\n",
    "def statis_feat(df_know, df_unknow,cat_list):\n",
    "    for f in tqdm(cat_list):\n",
    "        df_unknow = stat(df_know, df_unknow, [f], {'related': ['mean']})\n",
    "\n",
    "    return df_unknow\n",
    "\n",
    "\n",
    "df_train = data[~data['related'].isnull()]\n",
    "df_train = df_train.reset_index(drop=True)\n",
    "df_test = data[data['related'].isnull()]\n",
    "\n",
    "df_stas_feat = None\n",
    "kf = StratifiedKFold(n_splits=5, random_state=2020, shuffle=True)\n",
    "for train_index, val_index in kf.split(df_train, df_train['related']):\n",
    "    df_fold_train = df_train.iloc[train_index]\n",
    "    df_fold_val = df_train.iloc[val_index]\n",
    "\n",
    "    df_fold_val = statis_feat(df_fold_train, df_fold_val,cat_list)\n",
    "    df_stas_feat = pd.concat([df_stas_feat, df_fold_val], axis=0)\n",
    "\n",
    "    del (df_fold_train)\n",
    "    del (df_fold_val)\n",
    "    gc.collect()\n",
    "\n",
    "df_test = statis_feat(df_train, df_test,cat_list)\n",
    "data = pd.concat([df_stas_feat, df_test], axis=0)\n",
    "data = data.reset_index(drop=True)\n",
    "\n",
    "del (df_stas_feat)\n",
    "del (df_train)\n",
    "del (df_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 疾病特征处理\n",
    "\n",
    "这里我们使用TruncatedSVD的方法，对疾病特征进行降维，维度均为128。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "f_col = [col for col in disease_feature1.columns if 'F' in col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "disease_feature_1_ = disease_feature1.copy()\n",
    "from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer,TfidfTransformer \n",
    "from sklearn.decomposition import TruncatedSVD, SparsePCA\n",
    "disease_feature_1_ = disease_feature_1_.fillna(0)\n",
    "decom=TruncatedSVD(n_components=128, n_iter = 20, random_state=2023) \n",
    "\n",
    "decom_x=decom.fit_transform(disease_feature_1_.iloc[:,1:]) \n",
    "decom_feas=pd.DataFrame(decom_x)\n",
    "decom_feas.columns=['disease1_svd_'+str(i) for i in range(decom_x.shape[1])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "disease_feature1 = disease_feature1[['disease_id']]\n",
    "for col in decom_feas:\n",
    "    disease_feature1[col] = decom_feas[col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "f_col = [col for col in disease_feature2.columns if 'F' in col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "disease_feature_2_ = disease_feature2.copy()\n",
    "from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer,TfidfTransformer \n",
    "from sklearn.decomposition import TruncatedSVD, SparsePCA\n",
    "disease_feature_2_ = disease_feature_2_.fillna(0)\n",
    "decom=TruncatedSVD(n_components=128, n_iter = 20, random_state=2023) \n",
    "\n",
    "decom_x=decom.fit_transform(disease_feature_2_.iloc[:,1:]) \n",
    "decom_feas=pd.DataFrame(decom_x)\n",
    "decom_feas.columns=['disease2_svd_'+str(i) for i in range(decom_x.shape[1])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "disease_feature2 = disease_feature2[['disease_id']]\n",
    "for col in decom_feas:\n",
    "    disease_feature2[col] = decom_feas[col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "f_col = [col for col in disease_feature3.columns if 'F' in col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "disease_feature_3_ = disease_feature3.copy()\n",
    "from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer,TfidfTransformer \n",
    "from sklearn.decomposition import TruncatedSVD, SparsePCA\n",
    "disease_feature_3_ = disease_feature_3_.fillna(0)\n",
    "decom=TruncatedSVD(n_components=128, n_iter = 20, random_state=2023) \n",
    "\n",
    "decom_x=decom.fit_transform(disease_feature_3_.iloc[:,1:]) \n",
    "decom_feas=pd.DataFrame(decom_x)\n",
    "decom_feas.columns=['disease3_svd_'+str(i) for i in range(decom_x.shape[1])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "disease_feature3 = disease_feature3[['disease_id']]\n",
    "for col in decom_feas:\n",
    "    disease_feature3[col] = decom_feas[col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>food_id</th>\n",
       "      <th>disease_id</th>\n",
       "      <th>related</th>\n",
       "      <th>food</th>\n",
       "      <th>disease</th>\n",
       "      <th>disease_related_mean</th>\n",
       "      <th>N_0</th>\n",
       "      <th>N_1</th>\n",
       "      <th>N_2</th>\n",
       "      <th>N_3</th>\n",
       "      <th>...</th>\n",
       "      <th>disease3_svd_118</th>\n",
       "      <th>disease3_svd_119</th>\n",
       "      <th>disease3_svd_120</th>\n",
       "      <th>disease3_svd_121</th>\n",
       "      <th>disease3_svd_122</th>\n",
       "      <th>disease3_svd_123</th>\n",
       "      <th>disease3_svd_124</th>\n",
       "      <th>disease3_svd_125</th>\n",
       "      <th>disease3_svd_126</th>\n",
       "      <th>disease3_svd_127</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_861</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>861</td>\n",
       "      <td>0.003521</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003554</td>\n",
       "      <td>-0.050520</td>\n",
       "      <td>-0.049823</td>\n",
       "      <td>-0.017363</td>\n",
       "      <td>-0.116130</td>\n",
       "      <td>0.085801</td>\n",
       "      <td>0.072854</td>\n",
       "      <td>0.120381</td>\n",
       "      <td>0.033087</td>\n",
       "      <td>-0.025249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_839</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>839</td>\n",
       "      <td>0.007299</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.004057</td>\n",
       "      <td>0.011732</td>\n",
       "      <td>-0.012960</td>\n",
       "      <td>-0.005264</td>\n",
       "      <td>0.024481</td>\n",
       "      <td>-0.021813</td>\n",
       "      <td>-0.020103</td>\n",
       "      <td>0.079306</td>\n",
       "      <td>0.056902</td>\n",
       "      <td>-0.012589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "      <td>0.018382</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.082889</td>\n",
       "      <td>-0.008238</td>\n",
       "      <td>-0.058866</td>\n",
       "      <td>0.072690</td>\n",
       "      <td>-0.040983</td>\n",
       "      <td>0.001735</td>\n",
       "      <td>-0.031011</td>\n",
       "      <td>0.012370</td>\n",
       "      <td>-0.016576</td>\n",
       "      <td>0.062942</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_1370</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1370</td>\n",
       "      <td>0.214286</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.114083</td>\n",
       "      <td>0.041070</td>\n",
       "      <td>-0.060834</td>\n",
       "      <td>0.031545</td>\n",
       "      <td>0.020605</td>\n",
       "      <td>0.045850</td>\n",
       "      <td>0.060787</td>\n",
       "      <td>-0.054691</td>\n",
       "      <td>-0.081137</td>\n",
       "      <td>-0.041880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>food_0</td>\n",
       "      <td>disease_1015</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1015</td>\n",
       "      <td>0.202749</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.035130</td>\n",
       "      <td>-0.033633</td>\n",
       "      <td>-0.015799</td>\n",
       "      <td>-0.042921</td>\n",
       "      <td>0.035031</td>\n",
       "      <td>0.071032</td>\n",
       "      <td>-0.097231</td>\n",
       "      <td>0.081445</td>\n",
       "      <td>-0.014732</td>\n",
       "      <td>0.010713</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 602 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  food_id    disease_id  related  food  disease  disease_related_mean  N_0  \\\n",
       "0  food_0   disease_861      0.0     0      861              0.003521  NaN   \n",
       "1  food_0   disease_839      0.0     0      839              0.007299  NaN   \n",
       "2  food_0    disease_50      0.0     0       50              0.018382  NaN   \n",
       "3  food_0  disease_1370      0.0     0     1370              0.214286  NaN   \n",
       "4  food_0  disease_1015      0.0     0     1015              0.202749  NaN   \n",
       "\n",
       "   N_1  N_2  N_3  ...  disease3_svd_118  disease3_svd_119  disease3_svd_120  \\\n",
       "0  NaN  NaN  NaN  ...          0.003554         -0.050520         -0.049823   \n",
       "1  NaN  NaN  NaN  ...         -0.004057          0.011732         -0.012960   \n",
       "2  NaN  NaN  NaN  ...          0.082889         -0.008238         -0.058866   \n",
       "3  NaN  NaN  NaN  ...         -0.114083          0.041070         -0.060834   \n",
       "4  NaN  NaN  NaN  ...         -0.035130         -0.033633         -0.015799   \n",
       "\n",
       "   disease3_svd_121  disease3_svd_122  disease3_svd_123  disease3_svd_124  \\\n",
       "0         -0.017363         -0.116130          0.085801          0.072854   \n",
       "1         -0.005264          0.024481         -0.021813         -0.020103   \n",
       "2          0.072690         -0.040983          0.001735         -0.031011   \n",
       "3          0.031545          0.020605          0.045850          0.060787   \n",
       "4         -0.042921          0.035031          0.071032         -0.097231   \n",
       "\n",
       "   disease3_svd_125  disease3_svd_126  disease3_svd_127  \n",
       "0          0.120381          0.033087         -0.025249  \n",
       "1          0.079306          0.056902         -0.012589  \n",
       "2          0.012370         -0.016576          0.062942  \n",
       "3         -0.054691         -0.081137         -0.041880  \n",
       "4          0.081445         -0.014732          0.010713  \n",
       "\n",
       "[5 rows x 602 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = data.merge(food, on = 'food_id', how = 'left')\n",
    "data = data.merge(disease_feature1, on = 'disease_id', how = 'left')\n",
    "data = data.merge(disease_feature2, on = 'disease_id', how = 'left')\n",
    "data = data.merge(disease_feature3, on = 'disease_id', how = 'left')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 交叉特征\n",
    "\n",
    "这里我们按照特征重要性选取靠前的部分特征进行交叉。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "topn = ['N_33', 'N_198', 'N_74','disease','food']\n",
    "for i in range(len(topn)):\n",
    "    for j in range(i + 1, len(topn)):\n",
    "        data[f'{topn[i]}+{topn[j]}'] = data[topn[i]] + data[topn[j]]\n",
    "        data[f'{topn[i]}-{topn[j]}'] = data[topn[i]] - data[topn[j]]\n",
    "        data[f'{topn[i]}*{topn[j]}'] = data[topn[i]] * data[topn[j]]\n",
    "        data[f'{topn[i]}/{topn[j]}'] = data[topn[i]] / (data[topn[j]]+1e-5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "drop_cols = ['disease_id', 'food_id', 'related']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 特征筛选\n",
    "\n",
    "去除掉只有单一取值的特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "for f in data.columns:\n",
    "    if data[f].nunique() < 2:\n",
    "        drop_cols.append(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "test_df = data[data[\"related\"].isnull() == True].copy().reset_index(drop=True)\n",
    "train_df = data[~data[\"related\"].isnull() == True].copy().reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "scrolled": true,
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "588\n",
      "['food', 'disease', 'disease_related_mean', 'N_0', 'N_1', 'N_3', 'N_4', 'N_5', 'N_6', 'N_7', 'N_9', 'N_10', 'N_11', 'N_13', 'N_14', 'N_16', 'N_17', 'N_18', 'N_19', 'N_20', 'N_22', 'N_28', 'N_30', 'N_33', 'N_35', 'N_37', 'N_40', 'N_42', 'N_43', 'N_44', 'N_45', 'N_46', 'N_47', 'N_48', 'N_49', 'N_50', 'N_51', 'N_52', 'N_53', 'N_54', 'N_55', 'N_56', 'N_57', 'N_58', 'N_59', 'N_60', 'N_61', 'N_62', 'N_63', 'N_64', 'N_67', 'N_68', 'N_69', 'N_71', 'N_72', 'N_73', 'N_74', 'N_75', 'N_76', 'N_77', 'N_78', 'N_79', 'N_80', 'N_81', 'N_82', 'N_84', 'N_85', 'N_86', 'N_87', 'N_88', 'N_89', 'N_90', 'N_91', 'N_92', 'N_93', 'N_94', 'N_95', 'N_96', 'N_97', 'N_98', 'N_99', 'N_100', 'N_101', 'N_102', 'N_104', 'N_105', 'N_106', 'N_109', 'N_111', 'N_112', 'N_113', 'N_114', 'N_115', 'N_116', 'N_118', 'N_119', 'N_120', 'N_121', 'N_122', 'N_123', 'N_124', 'N_125', 'N_126', 'N_127', 'N_128', 'N_129', 'N_131', 'N_132', 'N_133', 'N_134', 'N_135', 'N_136', 'N_138', 'N_139', 'N_140', 'N_141', 'N_142', 'N_145', 'N_146', 'N_147', 'N_148', 'N_149', 'N_151', 'N_152', 'N_153', 'N_154', 'N_155', 'N_156', 'N_157', 'N_158', 'N_160', 'N_161', 'N_162', 'N_163', 'N_164', 'N_165', 'N_166', 'N_167', 'N_168', 'N_169', 'N_170', 'N_171', 'N_172', 'N_173', 'N_174', 'N_175', 'N_176', 'N_177', 'N_178', 'N_179', 'N_180', 'N_181', 'N_182', 'N_183', 'N_184', 'N_185', 'N_187', 'N_188', 'N_190', 'N_191', 'N_192', 'N_193', 'N_194', 'N_195', 'N_196', 'N_197', 'N_198', 'N_199', 'N_200', 'N_201', 'N_202', 'N_203', 'N_204', 'N_205', 'N_206', 'N_207', 'N_208', 'N_209', 'N_210', 'N_211', 'disease1_svd_0', 'disease1_svd_1', 'disease1_svd_2', 'disease1_svd_3', 'disease1_svd_4', 'disease1_svd_5', 'disease1_svd_6', 'disease1_svd_7', 'disease1_svd_8', 'disease1_svd_9', 'disease1_svd_10', 'disease1_svd_11', 'disease1_svd_12', 'disease1_svd_13', 'disease1_svd_14', 'disease1_svd_15', 'disease1_svd_16', 'disease1_svd_17', 'disease1_svd_18', 'disease1_svd_19', 'disease1_svd_20', 'disease1_svd_21', 'disease1_svd_22', 'disease1_svd_23', 'disease1_svd_24', 'disease1_svd_25', 'disease1_svd_26', 'disease1_svd_27', 'disease1_svd_28', 'disease1_svd_29', 'disease1_svd_30', 'disease1_svd_31', 'disease1_svd_32', 'disease1_svd_33', 'disease1_svd_34', 'disease1_svd_35', 'disease1_svd_36', 'disease1_svd_37', 'disease1_svd_38', 'disease1_svd_39', 'disease1_svd_40', 'disease1_svd_41', 'disease1_svd_42', 'disease1_svd_43', 'disease1_svd_44', 'disease1_svd_45', 'disease1_svd_46', 'disease1_svd_47', 'disease1_svd_48', 'disease1_svd_49', 'disease1_svd_50', 'disease1_svd_51', 'disease1_svd_52', 'disease1_svd_53', 'disease1_svd_54', 'disease1_svd_55', 'disease1_svd_56', 'disease1_svd_57', 'disease1_svd_58', 'disease1_svd_59', 'disease1_svd_60', 'disease1_svd_61', 'disease1_svd_62', 'disease1_svd_63', 'disease1_svd_64', 'disease1_svd_65', 'disease1_svd_66', 'disease1_svd_67', 'disease1_svd_68', 'disease1_svd_69', 'disease1_svd_70', 'disease1_svd_71', 'disease1_svd_72', 'disease1_svd_73', 'disease1_svd_74', 'disease1_svd_75', 'disease1_svd_76', 'disease1_svd_77', 'disease1_svd_78', 'disease1_svd_79', 'disease1_svd_80', 'disease1_svd_81', 'disease1_svd_82', 'disease1_svd_83', 'disease1_svd_84', 'disease1_svd_85', 'disease1_svd_86', 'disease1_svd_87', 'disease1_svd_88', 'disease1_svd_89', 'disease1_svd_90', 'disease1_svd_91', 'disease1_svd_92', 'disease1_svd_93', 'disease1_svd_94', 'disease1_svd_95', 'disease1_svd_96', 'disease1_svd_97', 'disease1_svd_98', 'disease1_svd_99', 'disease1_svd_100', 'disease1_svd_101', 'disease1_svd_102', 'disease1_svd_103', 'disease1_svd_104', 'disease1_svd_105', 'disease1_svd_106', 'disease1_svd_107', 'disease1_svd_108', 'disease1_svd_109', 'disease1_svd_110', 'disease1_svd_111', 'disease1_svd_112', 'disease1_svd_113', 'disease1_svd_114', 'disease1_svd_115', 'disease1_svd_116', 'disease1_svd_117', 'disease1_svd_118', 'disease1_svd_119', 'disease1_svd_120', 'disease1_svd_121', 'disease1_svd_122', 'disease1_svd_123', 'disease1_svd_124', 'disease1_svd_125', 'disease1_svd_126', 'disease1_svd_127', 'disease2_svd_0', 'disease2_svd_1', 'disease2_svd_2', 'disease2_svd_3', 'disease2_svd_4', 'disease2_svd_5', 'disease2_svd_6', 'disease2_svd_7', 'disease2_svd_8', 'disease2_svd_9', 'disease2_svd_10', 'disease2_svd_11', 'disease2_svd_12', 'disease2_svd_13', 'disease2_svd_14', 'disease2_svd_15', 'disease2_svd_16', 'disease2_svd_17', 'disease2_svd_18', 'disease2_svd_19', 'disease2_svd_20', 'disease2_svd_21', 'disease2_svd_22', 'disease2_svd_23', 'disease2_svd_24', 'disease2_svd_25', 'disease2_svd_26', 'disease2_svd_27', 'disease2_svd_28', 'disease2_svd_29', 'disease2_svd_30', 'disease2_svd_31', 'disease2_svd_32', 'disease2_svd_33', 'disease2_svd_34', 'disease2_svd_35', 'disease2_svd_36', 'disease2_svd_37', 'disease2_svd_38', 'disease2_svd_39', 'disease2_svd_40', 'disease2_svd_41', 'disease2_svd_42', 'disease2_svd_43', 'disease2_svd_44', 'disease2_svd_45', 'disease2_svd_46', 'disease2_svd_47', 'disease2_svd_48', 'disease2_svd_49', 'disease2_svd_50', 'disease2_svd_51', 'disease2_svd_52', 'disease2_svd_53', 'disease2_svd_54', 'disease2_svd_55', 'disease2_svd_56', 'disease2_svd_57', 'disease2_svd_58', 'disease2_svd_59', 'disease2_svd_60', 'disease2_svd_61', 'disease2_svd_62', 'disease2_svd_63', 'disease2_svd_64', 'disease2_svd_65', 'disease2_svd_66', 'disease2_svd_67', 'disease2_svd_68', 'disease2_svd_69', 'disease2_svd_70', 'disease2_svd_71', 'disease2_svd_72', 'disease2_svd_73', 'disease2_svd_74', 'disease2_svd_75', 'disease2_svd_76', 'disease2_svd_77', 'disease2_svd_78', 'disease2_svd_79', 'disease2_svd_80', 'disease2_svd_81', 'disease2_svd_82', 'disease2_svd_83', 'disease2_svd_84', 'disease2_svd_85', 'disease2_svd_86', 'disease2_svd_87', 'disease2_svd_88', 'disease2_svd_89', 'disease2_svd_90', 'disease2_svd_91', 'disease2_svd_92', 'disease2_svd_93', 'disease2_svd_94', 'disease2_svd_95', 'disease2_svd_96', 'disease2_svd_97', 'disease2_svd_98', 'disease2_svd_99', 'disease2_svd_100', 'disease2_svd_101', 'disease2_svd_102', 'disease2_svd_103', 'disease2_svd_104', 'disease2_svd_105', 'disease2_svd_106', 'disease2_svd_107', 'disease2_svd_108', 'disease2_svd_109', 'disease2_svd_110', 'disease2_svd_111', 'disease2_svd_112', 'disease2_svd_113', 'disease2_svd_114', 'disease2_svd_115', 'disease2_svd_116', 'disease2_svd_117', 'disease2_svd_118', 'disease2_svd_119', 'disease2_svd_120', 'disease2_svd_121', 'disease2_svd_122', 'disease2_svd_123', 'disease2_svd_124', 'disease2_svd_125', 'disease2_svd_126', 'disease2_svd_127', 'disease3_svd_0', 'disease3_svd_1', 'disease3_svd_2', 'disease3_svd_3', 'disease3_svd_4', 'disease3_svd_5', 'disease3_svd_6', 'disease3_svd_7', 'disease3_svd_8', 'disease3_svd_9', 'disease3_svd_10', 'disease3_svd_11', 'disease3_svd_12', 'disease3_svd_13', 'disease3_svd_14', 'disease3_svd_15', 'disease3_svd_16', 'disease3_svd_17', 'disease3_svd_18', 'disease3_svd_19', 'disease3_svd_20', 'disease3_svd_21', 'disease3_svd_22', 'disease3_svd_23', 'disease3_svd_24', 'disease3_svd_25', 'disease3_svd_26', 'disease3_svd_27', 'disease3_svd_28', 'disease3_svd_29', 'disease3_svd_30', 'disease3_svd_31', 'disease3_svd_32', 'disease3_svd_33', 'disease3_svd_34', 'disease3_svd_35', 'disease3_svd_36', 'disease3_svd_37', 'disease3_svd_38', 'disease3_svd_39', 'disease3_svd_40', 'disease3_svd_41', 'disease3_svd_42', 'disease3_svd_43', 'disease3_svd_44', 'disease3_svd_45', 'disease3_svd_46', 'disease3_svd_47', 'disease3_svd_48', 'disease3_svd_49', 'disease3_svd_50', 'disease3_svd_51', 'disease3_svd_52', 'disease3_svd_53', 'disease3_svd_54', 'disease3_svd_55', 'disease3_svd_56', 'disease3_svd_57', 'disease3_svd_58', 'disease3_svd_59', 'disease3_svd_60', 'disease3_svd_61', 'disease3_svd_62', 'disease3_svd_63', 'disease3_svd_64', 'disease3_svd_65', 'disease3_svd_66', 'disease3_svd_67', 'disease3_svd_68', 'disease3_svd_69', 'disease3_svd_70', 'disease3_svd_71', 'disease3_svd_72', 'disease3_svd_73', 'disease3_svd_74', 'disease3_svd_75', 'disease3_svd_76', 'disease3_svd_77', 'disease3_svd_78', 'disease3_svd_79', 'disease3_svd_80', 'disease3_svd_81', 'disease3_svd_82', 'disease3_svd_83', 'disease3_svd_84', 'disease3_svd_85', 'disease3_svd_86', 'disease3_svd_87', 'disease3_svd_88', 'disease3_svd_89', 'disease3_svd_90', 'disease3_svd_91', 'disease3_svd_92', 'disease3_svd_93', 'disease3_svd_94', 'disease3_svd_95', 'disease3_svd_96', 'disease3_svd_97', 'disease3_svd_98', 'disease3_svd_99', 'disease3_svd_100', 'disease3_svd_101', 'disease3_svd_102', 'disease3_svd_103', 'disease3_svd_104', 'disease3_svd_105', 'disease3_svd_106', 'disease3_svd_107', 'disease3_svd_108', 'disease3_svd_109', 'disease3_svd_110', 'disease3_svd_111', 'disease3_svd_112', 'disease3_svd_113', 'disease3_svd_114', 'disease3_svd_115', 'disease3_svd_116', 'disease3_svd_117', 'disease3_svd_118', 'disease3_svd_119', 'disease3_svd_120', 'disease3_svd_121', 'disease3_svd_122', 'disease3_svd_123', 'disease3_svd_124', 'disease3_svd_125', 'disease3_svd_126', 'disease3_svd_127', 'N_33+N_198', 'N_33-N_198', 'N_33*N_198', 'N_33/N_198', 'N_33+N_74', 'N_33-N_74', 'N_33*N_74', 'N_33/N_74', 'N_33+food', 'N_33-food', 'N_33*food', 'N_33/food', 'N_198+N_74', 'N_198-N_74', 'N_198*N_74', 'N_198/N_74', 'N_198+food', 'N_198-food', 'N_198*food', 'N_198/food', 'N_74+food', 'N_74-food', 'N_74*food', 'N_74/food']\n"
     ]
    }
   ],
   "source": [
    "feature_name = [f for f in train_df.columns if f not in drop_cols]\n",
    "X_train = train_df[feature_name].reset_index(drop=True)\n",
    "X_test = test_df[feature_name].reset_index(drop=True)\n",
    "y = train_df['related'].reset_index(drop=True)\n",
    "print(len(feature_name))\n",
    "print(feature_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(46805, 626)\n"
     ]
    }
   ],
   "source": [
    "print(test_df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 模型训练\n",
    "\n",
    "本次仅使用lightgbm模型来训练。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "train_pred = {}\n",
    "test_pred = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "scrolled": true,
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 --------------------------------------------------------------------------------------------\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's auc: 0.949458\n",
      "[200]\tvalid_0's auc: 0.959777\n",
      "[300]\tvalid_0's auc: 0.964304\n",
      "[400]\tvalid_0's auc: 0.967173\n",
      "[500]\tvalid_0's auc: 0.969298\n",
      "[600]\tvalid_0's auc: 0.970734\n",
      "[700]\tvalid_0's auc: 0.97184\n",
      "[800]\tvalid_0's auc: 0.972792\n",
      "[900]\tvalid_0's auc: 0.973685\n",
      "[1000]\tvalid_0's auc: 0.97433\n",
      "[1100]\tvalid_0's auc: 0.97489\n",
      "[1200]\tvalid_0's auc: 0.975442\n",
      "[1300]\tvalid_0's auc: 0.975927\n",
      "[1400]\tvalid_0's auc: 0.976211\n",
      "[1500]\tvalid_0's auc: 0.976626\n",
      "[1600]\tvalid_0's auc: 0.976887\n",
      "[1700]\tvalid_0's auc: 0.977127\n",
      "[1800]\tvalid_0's auc: 0.977394\n",
      "[1900]\tvalid_0's auc: 0.977593\n",
      "[2000]\tvalid_0's auc: 0.97781\n",
      "[2100]\tvalid_0's auc: 0.977997\n",
      "[2200]\tvalid_0's auc: 0.978176\n",
      "[2300]\tvalid_0's auc: 0.978341\n",
      "[2400]\tvalid_0's auc: 0.978489\n",
      "[2500]\tvalid_0's auc: 0.978705\n",
      "[2600]\tvalid_0's auc: 0.978843\n",
      "[2700]\tvalid_0's auc: 0.979005\n",
      "[2800]\tvalid_0's auc: 0.979131\n",
      "[2900]\tvalid_0's auc: 0.979242\n",
      "[3000]\tvalid_0's auc: 0.979413\n",
      "[3100]\tvalid_0's auc: 0.979516\n",
      "[3200]\tvalid_0's auc: 0.979645\n",
      "[3300]\tvalid_0's auc: 0.979739\n",
      "[3400]\tvalid_0's auc: 0.979784\n",
      "[3500]\tvalid_0's auc: 0.979875\n",
      "[3600]\tvalid_0's auc: 0.979941\n",
      "[3700]\tvalid_0's auc: 0.980045\n",
      "[3800]\tvalid_0's auc: 0.980099\n",
      "[3900]\tvalid_0's auc: 0.980162\n",
      "[4000]\tvalid_0's auc: 0.980287\n",
      "[4100]\tvalid_0's auc: 0.980348\n",
      "[4200]\tvalid_0's auc: 0.980369\n",
      "[4300]\tvalid_0's auc: 0.980425\n",
      "[4400]\tvalid_0's auc: 0.98051\n",
      "[4500]\tvalid_0's auc: 0.98055\n",
      "[4600]\tvalid_0's auc: 0.980603\n",
      "[4700]\tvalid_0's auc: 0.980674\n",
      "[4800]\tvalid_0's auc: 0.980699\n",
      "[4900]\tvalid_0's auc: 0.980728\n",
      "[5000]\tvalid_0's auc: 0.980775\n",
      "[5100]\tvalid_0's auc: 0.980817\n",
      "[5200]\tvalid_0's auc: 0.980853\n",
      "[5300]\tvalid_0's auc: 0.980893\n",
      "[5400]\tvalid_0's auc: 0.980918\n",
      "[5500]\tvalid_0's auc: 0.980933\n",
      "[5600]\tvalid_0's auc: 0.980948\n",
      "[5700]\tvalid_0's auc: 0.980981\n",
      "[5800]\tvalid_0's auc: 0.98097\n",
      "Early stopping, best iteration is:\n",
      "[5722]\tvalid_0's auc: 0.980989\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's auc: 0.950027\n",
      "[200]\tvalid_0's auc: 0.959938\n",
      "[300]\tvalid_0's auc: 0.964937\n",
      "[400]\tvalid_0's auc: 0.967805\n",
      "[500]\tvalid_0's auc: 0.969622\n",
      "[600]\tvalid_0's auc: 0.971088\n",
      "[700]\tvalid_0's auc: 0.9722\n",
      "[800]\tvalid_0's auc: 0.972828\n",
      "[900]\tvalid_0's auc: 0.973659\n",
      "[1000]\tvalid_0's auc: 0.974214\n",
      "[1100]\tvalid_0's auc: 0.974677\n",
      "[1200]\tvalid_0's auc: 0.975196\n",
      "[1300]\tvalid_0's auc: 0.975674\n",
      "[1400]\tvalid_0's auc: 0.976063\n",
      "[1500]\tvalid_0's auc: 0.976356\n",
      "[1600]\tvalid_0's auc: 0.976684\n",
      "[1700]\tvalid_0's auc: 0.976991\n",
      "[1800]\tvalid_0's auc: 0.977259\n",
      "[1900]\tvalid_0's auc: 0.977462\n",
      "[2000]\tvalid_0's auc: 0.97767\n",
      "[2100]\tvalid_0's auc: 0.977802\n",
      "[2200]\tvalid_0's auc: 0.977987\n",
      "[2300]\tvalid_0's auc: 0.978169\n",
      "[2400]\tvalid_0's auc: 0.978243\n",
      "[2500]\tvalid_0's auc: 0.978419\n",
      "[2600]\tvalid_0's auc: 0.978477\n",
      "[2700]\tvalid_0's auc: 0.978578\n",
      "[2800]\tvalid_0's auc: 0.978721\n",
      "[2900]\tvalid_0's auc: 0.978796\n",
      "[3000]\tvalid_0's auc: 0.978889\n",
      "[3100]\tvalid_0's auc: 0.978985\n",
      "[3200]\tvalid_0's auc: 0.979063\n",
      "[3300]\tvalid_0's auc: 0.9791\n",
      "[3400]\tvalid_0's auc: 0.979187\n",
      "[3500]\tvalid_0's auc: 0.979283\n",
      "[3600]\tvalid_0's auc: 0.979396\n",
      "[3700]\tvalid_0's auc: 0.979473\n",
      "[3800]\tvalid_0's auc: 0.979555\n",
      "[3900]\tvalid_0's auc: 0.979695\n",
      "[4000]\tvalid_0's auc: 0.97974\n",
      "[4100]\tvalid_0's auc: 0.979777\n",
      "[4200]\tvalid_0's auc: 0.979819\n",
      "[4300]\tvalid_0's auc: 0.979893\n",
      "[4400]\tvalid_0's auc: 0.979946\n",
      "[4500]\tvalid_0's auc: 0.979963\n",
      "[4600]\tvalid_0's auc: 0.98001\n",
      "[4700]\tvalid_0's auc: 0.980054\n",
      "[4800]\tvalid_0's auc: 0.980072\n",
      "[4900]\tvalid_0's auc: 0.980102\n",
      "[5000]\tvalid_0's auc: 0.980103\n",
      "Early stopping, best iteration is:\n",
      "[4930]\tvalid_0's auc: 0.980111\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's auc: 0.945509\n",
      "[200]\tvalid_0's auc: 0.957126\n",
      "[300]\tvalid_0's auc: 0.962499\n",
      "[400]\tvalid_0's auc: 0.966045\n",
      "[500]\tvalid_0's auc: 0.968341\n",
      "[600]\tvalid_0's auc: 0.969859\n",
      "[700]\tvalid_0's auc: 0.971038\n",
      "[800]\tvalid_0's auc: 0.972075\n",
      "[900]\tvalid_0's auc: 0.972867\n",
      "[1000]\tvalid_0's auc: 0.973547\n",
      "[1100]\tvalid_0's auc: 0.974153\n",
      "[1200]\tvalid_0's auc: 0.974683\n",
      "[1300]\tvalid_0's auc: 0.97523\n",
      "[1400]\tvalid_0's auc: 0.975676\n",
      "[1500]\tvalid_0's auc: 0.976042\n",
      "[1600]\tvalid_0's auc: 0.976402\n",
      "[1700]\tvalid_0's auc: 0.976655\n",
      "[1800]\tvalid_0's auc: 0.976866\n",
      "[1900]\tvalid_0's auc: 0.977145\n",
      "[2000]\tvalid_0's auc: 0.977343\n",
      "[2100]\tvalid_0's auc: 0.977578\n",
      "[2200]\tvalid_0's auc: 0.977792\n",
      "[2300]\tvalid_0's auc: 0.977984\n",
      "[2400]\tvalid_0's auc: 0.978142\n",
      "[2500]\tvalid_0's auc: 0.97833\n",
      "[2600]\tvalid_0's auc: 0.978463\n",
      "[2700]\tvalid_0's auc: 0.978588\n",
      "[2800]\tvalid_0's auc: 0.978679\n",
      "[2900]\tvalid_0's auc: 0.978795\n",
      "[3000]\tvalid_0's auc: 0.978912\n",
      "[3100]\tvalid_0's auc: 0.979047\n",
      "[3200]\tvalid_0's auc: 0.979147\n",
      "[3300]\tvalid_0's auc: 0.979257\n",
      "[3400]\tvalid_0's auc: 0.979326\n",
      "[3500]\tvalid_0's auc: 0.979429\n",
      "[3600]\tvalid_0's auc: 0.979481\n",
      "[3700]\tvalid_0's auc: 0.979539\n",
      "[3800]\tvalid_0's auc: 0.979622\n",
      "[3900]\tvalid_0's auc: 0.979647\n",
      "[4000]\tvalid_0's auc: 0.979732\n",
      "[4100]\tvalid_0's auc: 0.97979\n",
      "[4200]\tvalid_0's auc: 0.979843\n",
      "[4300]\tvalid_0's auc: 0.979924\n",
      "[4400]\tvalid_0's auc: 0.98001\n",
      "[4500]\tvalid_0's auc: 0.98002\n",
      "[4600]\tvalid_0's auc: 0.98006\n",
      "[4700]\tvalid_0's auc: 0.98014\n",
      "[4800]\tvalid_0's auc: 0.980186\n",
      "[4900]\tvalid_0's auc: 0.980242\n",
      "[5000]\tvalid_0's auc: 0.98027\n",
      "[5100]\tvalid_0's auc: 0.980302\n",
      "[5200]\tvalid_0's auc: 0.980306\n",
      "[5300]\tvalid_0's auc: 0.980371\n",
      "[5400]\tvalid_0's auc: 0.980391\n",
      "Early stopping, best iteration is:\n",
      "[5389]\tvalid_0's auc: 0.980398\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's auc: 0.952301\n",
      "[200]\tvalid_0's auc: 0.962685\n",
      "[300]\tvalid_0's auc: 0.967336\n",
      "[400]\tvalid_0's auc: 0.970809\n",
      "[500]\tvalid_0's auc: 0.972868\n",
      "[600]\tvalid_0's auc: 0.97449\n",
      "[700]\tvalid_0's auc: 0.975652\n",
      "[800]\tvalid_0's auc: 0.976443\n",
      "[900]\tvalid_0's auc: 0.97732\n",
      "[1000]\tvalid_0's auc: 0.97785\n",
      "[1100]\tvalid_0's auc: 0.978349\n",
      "[1200]\tvalid_0's auc: 0.978761\n",
      "[1300]\tvalid_0's auc: 0.979175\n",
      "[1400]\tvalid_0's auc: 0.979587\n",
      "[1500]\tvalid_0's auc: 0.979879\n",
      "[1600]\tvalid_0's auc: 0.980247\n",
      "[1700]\tvalid_0's auc: 0.980573\n",
      "[1800]\tvalid_0's auc: 0.980804\n",
      "[1900]\tvalid_0's auc: 0.981015\n",
      "[2000]\tvalid_0's auc: 0.981178\n",
      "[2100]\tvalid_0's auc: 0.981371\n",
      "[2200]\tvalid_0's auc: 0.981576\n",
      "[2300]\tvalid_0's auc: 0.98171\n",
      "[2400]\tvalid_0's auc: 0.981883\n",
      "[2500]\tvalid_0's auc: 0.98202\n",
      "[2600]\tvalid_0's auc: 0.982172\n",
      "[2700]\tvalid_0's auc: 0.982304\n",
      "[2800]\tvalid_0's auc: 0.982403\n",
      "[2900]\tvalid_0's auc: 0.982475\n",
      "[3000]\tvalid_0's auc: 0.9826\n",
      "[3100]\tvalid_0's auc: 0.982683\n",
      "[3200]\tvalid_0's auc: 0.982782\n",
      "[3300]\tvalid_0's auc: 0.982876\n",
      "[3400]\tvalid_0's auc: 0.982965\n",
      "[3500]\tvalid_0's auc: 0.983003\n",
      "[3600]\tvalid_0's auc: 0.983101\n",
      "[3700]\tvalid_0's auc: 0.983161\n",
      "[3800]\tvalid_0's auc: 0.983212\n",
      "[3900]\tvalid_0's auc: 0.983263\n",
      "[4000]\tvalid_0's auc: 0.983266\n",
      "[4100]\tvalid_0's auc: 0.983364\n",
      "[4200]\tvalid_0's auc: 0.983358\n",
      "[4300]\tvalid_0's auc: 0.983397\n",
      "[4400]\tvalid_0's auc: 0.983459\n",
      "[4500]\tvalid_0's auc: 0.983501\n",
      "Early stopping, best iteration is:\n",
      "[4470]\tvalid_0's auc: 0.983514\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's auc: 0.95199\n",
      "[200]\tvalid_0's auc: 0.962906\n",
      "[300]\tvalid_0's auc: 0.967527\n",
      "[400]\tvalid_0's auc: 0.970826\n",
      "[500]\tvalid_0's auc: 0.97282\n",
      "[600]\tvalid_0's auc: 0.974122\n",
      "[700]\tvalid_0's auc: 0.974963\n",
      "[800]\tvalid_0's auc: 0.976001\n",
      "[900]\tvalid_0's auc: 0.976629\n",
      "[1000]\tvalid_0's auc: 0.977395\n",
      "[1100]\tvalid_0's auc: 0.977824\n",
      "[1200]\tvalid_0's auc: 0.978371\n",
      "[1300]\tvalid_0's auc: 0.978707\n",
      "[1400]\tvalid_0's auc: 0.97908\n",
      "[1500]\tvalid_0's auc: 0.979361\n",
      "[1600]\tvalid_0's auc: 0.979649\n",
      "[1700]\tvalid_0's auc: 0.979947\n",
      "[1800]\tvalid_0's auc: 0.980176\n",
      "[1900]\tvalid_0's auc: 0.980395\n",
      "[2000]\tvalid_0's auc: 0.980537\n",
      "[2100]\tvalid_0's auc: 0.980691\n",
      "[2200]\tvalid_0's auc: 0.980871\n",
      "[2300]\tvalid_0's auc: 0.981015\n",
      "[2400]\tvalid_0's auc: 0.981173\n",
      "[2500]\tvalid_0's auc: 0.981276\n",
      "[2600]\tvalid_0's auc: 0.981401\n",
      "[2700]\tvalid_0's auc: 0.981446\n",
      "[2800]\tvalid_0's auc: 0.98157\n",
      "[2900]\tvalid_0's auc: 0.981658\n",
      "[3000]\tvalid_0's auc: 0.981744\n",
      "[3100]\tvalid_0's auc: 0.981874\n",
      "[3200]\tvalid_0's auc: 0.981953\n",
      "[3300]\tvalid_0's auc: 0.982039\n",
      "[3400]\tvalid_0's auc: 0.982099\n",
      "[3500]\tvalid_0's auc: 0.982162\n",
      "[3600]\tvalid_0's auc: 0.982221\n",
      "[3700]\tvalid_0's auc: 0.982283\n",
      "[3800]\tvalid_0's auc: 0.982314\n",
      "[3900]\tvalid_0's auc: 0.982407\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[4000]\tvalid_0's auc: 0.982491\n",
      "[4100]\tvalid_0's auc: 0.982525\n",
      "[4200]\tvalid_0's auc: 0.982581\n",
      "[4300]\tvalid_0's auc: 0.982648\n",
      "[4400]\tvalid_0's auc: 0.982711\n",
      "[4500]\tvalid_0's auc: 0.982756\n",
      "[4600]\tvalid_0's auc: 0.982764\n",
      "[4700]\tvalid_0's auc: 0.982788\n",
      "[4800]\tvalid_0's auc: 0.9828\n",
      "[4900]\tvalid_0's auc: 0.982854\n",
      "[5000]\tvalid_0's auc: 0.982886\n",
      "[5100]\tvalid_0's auc: 0.982885\n",
      "[5200]\tvalid_0's auc: 0.982911\n",
      "[5300]\tvalid_0's auc: 0.982922\n",
      "[5400]\tvalid_0's auc: 0.982968\n",
      "[5500]\tvalid_0's auc: 0.982967\n",
      "[5600]\tvalid_0's auc: 0.983015\n",
      "[5700]\tvalid_0's auc: 0.983044\n",
      "Early stopping, best iteration is:\n",
      "[5651]\tvalid_0's auc: 0.983049\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "53521"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seeds = [2]\n",
    "num_model_seed = 1\n",
    "oof = np.zeros(X_train.shape[0])\n",
    "prediction = np.zeros(X_test.shape[0])\n",
    "feat_imp_df = pd.DataFrame({'feats': feature_name, 'imp': 0})\n",
    "parameters = {\n",
    "    'learning_rate': 0.01,\n",
    "    'boosting_type': 'gbdt',\n",
    "    'objective': 'binary',\n",
    "    'metric': 'auc',\n",
    "    'num_leaves': 63,\n",
    "    'feature_fraction': 0.8,\n",
    "    'bagging_fraction': 0.8,\n",
    "    'bagging_freq': 5,\n",
    "    'seed': 2022,\n",
    "    'bagging_seed': 1,\n",
    "    'feature_fraction_seed': 7,\n",
    "    'min_data_in_leaf': 20,\n",
    "    'verbose': -1, \n",
    "    'n_jobs':8\n",
    "}\n",
    "fold = 5\n",
    "for model_seed in range(num_model_seed):\n",
    "    print(seeds[model_seed],\"--------------------------------------------------------------------------------------------\")\n",
    "    oof_cat = np.zeros(X_train.shape[0])\n",
    "    prediction_cat = np.zeros(X_test.shape[0])\n",
    "    skf = StratifiedKFold(n_splits=fold, random_state=seeds[model_seed], shuffle=True)\n",
    "    for index, (train_index, test_index) in enumerate(skf.split(X_train, y)):\n",
    "        train_x, test_x, train_y, test_y = X_train[feature_name].iloc[train_index], X_train[feature_name].iloc[test_index], y.iloc[train_index], y.iloc[test_index]\n",
    "        dtrain = lgb.Dataset(train_x, label=train_y)\n",
    "        dval = lgb.Dataset(test_x, label=test_y)\n",
    "        lgb_model = lgb.train(\n",
    "            parameters,\n",
    "            dtrain,\n",
    "            num_boost_round=10000,\n",
    "            valid_sets=[dval],\n",
    "            early_stopping_rounds=100,\n",
    "            verbose_eval=100, )\n",
    "        oof_cat[test_index] += lgb_model.predict(test_x,num_iteration=lgb_model.best_iteration)\n",
    "        prediction_cat += lgb_model.predict(X_test,num_iteration=lgb_model.best_iteration) / fold\n",
    "        feat_imp_df['imp'] += lgb_model.feature_importance()\n",
    "\n",
    "        del train_x\n",
    "        del test_x\n",
    "        del train_y\n",
    "        del test_y\n",
    "        del lgb_model\n",
    "    oof += oof_cat / num_model_seed\n",
    "    prediction += prediction_cat / num_model_seed\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "train_pred['lgb'] = oof\n",
    "test_pred['lgb'] = prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lgb train auc:  0.9815072860684189\n"
     ]
    }
   ],
   "source": [
    "print(\"lgb train auc: \", roc_auc_score(y, train_pred['lgb']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "scrolled": true,
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.40, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.60, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.80, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, "
     ]
    }
   ],
   "source": [
    "scores = []; thresholds = []\n",
    "best_score = 0; best_threshold = 0\n",
    "\n",
    "for threshold in np.arange(0.1,0.9,0.01):\n",
    "    print(f'{threshold:.02f}, ',end='')\n",
    "    preds = (train_pred['lgb'].reshape((-1)) > threshold).astype('int')\n",
    "    m = f1_score(y.values.reshape((-1)), preds, average='binary')   \n",
    "    scores.append(m)\n",
    "    thresholds.append(threshold)\n",
    "    if m>best_score:\n",
    "        best_score = m\n",
    "        best_threshold = threshold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABJkAAAFVCAYAAABM2D5DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAABfSElEQVR4nO3dd5xcVfn48c+ThEAiEFooEpLQRKpgAghYKIKg0hGBVYolCsj3J4goBqUGrAgIogEpSgAREVFURCEgnSAdBAMkgSASekkh5fz+OHfcyezs7mx2d2bL5/163dfs3Hvn3mdm7sze+8w5z4mUEpIkSZIkSVJnDGh0AJIkSZIkSer9TDJJkiRJkiSp00wySZIkSZIkqdNMMkmSJEmSJKnTTDJJkiRJkiSp00wySZIkSZIkqdNMMknqchGxfUSkiDi00bFU6s7YImJyRExrdBzq+zp6/Hi8SeorevL3mecYkmSSSVINihOVWqfRjY5XXaud9/ubZettFRHnRMTtEfFWV5zgRsQ6ETExIv4VEbMj4tWIeDwiLo2IHTr95PqQiBgdESdFxObduI/K939BRMyMiD9HxC7dtd+y/W9ePMfRHXjMSW0cv3Mr1j0+In4TEU8Xy6d1Mt6BEfHZiLgtIl6IiLkR8VxE3BwRp0TE0p3Zfl8VERtExLXF5/3tiPhHROzYwW1sExHXFa/3nIh4KiIuiIh1Ktb7QERcHRFTI+LNYnokIk6MiGEV6y4TEV+MiN9HxLRiu09HxBURsWFXPPcqz2OviDipg4+ZXOVYfyki7o6IL0fEwO6ItWz/KxSfu+1rXN9zjD6os5/jiPhacSz/JyLmFbc3R8TebTxmpYj4YfF5nhsRs4rHfKhrnpWkWgxqdACSeoXPVtz/EDAOmAj8o2LZLGB0HWJSfT0A/KjK/PvL/v44cCTwL+BBYNvO7DAixgK3APOBXwKPAkOA9YFdgDeBmzuzj17sVvJrMb9s3mjgRGAa+f3qLg/QfCwsVez3C8ANEbFvSumabtz35uTnOJn8PDviO8AzFfMWVtw/HXgF+CewQkeDq+JyYH/gdvJr9iqwFvB+4DjgHGBeF+ynz4iIdYE7gAXA94HXgS+Sj6/dUkp/q2EbuwLXA08B5wIvARuT/2/tGxGbppRmFqu/BxgKTAKeJ/8AuyUwHtgvIrZKKc0p1h1N/r93G/CLYv11gMOBfSJi15RSV38n7QUcApzUwcfNI38uAQJYDTgAOB/YEPh/XRNeVSuQP6eQP6vt8Ryjj+mKzzGwFfl7/k/kz/BKwKeAayLiOymlUyv2OYp8vC1L/nw+CQwDNgPW7PyzklQrk0yS2pVSuqz8fkQMIp8A3lm5rFje6X1GxHIppTc7vSF1lZnV3usK5wM/SCm9HRH70ckkE/kiZSiweUrpwcqFEbF6J7e/RHrCsZlSWgTMbXfF7tHiWIiI35ITi4cA3Zlk6ow/p5SmtLPOuimlpwEi4hHyxcoSiYgx5ATT71JK+1RZvjLwxpJuvxNxDQHmp5QW1HvfNTqDnKQYk1J6ACAiSknm8yLivSml1M42jiYnELdNKb1UmhkRjwIXkC9UzwJIKf2SnMQud35EPE6+ON4duKqYPwvYohRX2XYnkRPuPwDG1v5Uu9WCKp/Tc4GngUPp3iRTh3iO0Sd1+nOcUvp05byIOAu4DzguIk5PKZX/UHAZ+dp2s5TSf7riSUhaMnaXk9StIuKwiHi0aOo8PSKOq7LOtKJJ9BYRcUNEvA48VLZ8/Yj4VdFU+p1i/R9ExLsqtrNWRFxU7GdeRLwYEXdExCFLGlux3l6Ru4C9Hbkb2O0RsWcHXoM9I+L+oun2sxFxKrkFSC2P/V7RRWCzKsuGRe6ucW3ZvE9ExC2Ru0bMiYgZEXFNRLyn1niXVErpvymlt7twk+sDL1dLMBX7e6FyXkTsEBHXR8TLxev9dET8IiJWKVtnUER8IyIeK9Z5OSJ+FxGbVmxrdPHanxQRn46I+yJiDvCTsnU+GhF/jYjXim09FBFfruXJRcQzEXFLxbzji33+vmJ+6ThYrbi/WL2N4rbUguLiaO5aMrnKfms67jvo+eL2nSr7G1u8vi8V+3wiIsYXF5Ll620cuavazGK9FyJ3c/hEsfwk4OJi9ZvLnuMlXRA/AKUEUxdZv7i9qZV9vZxSKm+JRkQsHxETIncJLR2bt0XEARXrbVa8pqXj/LGIOC4qukFFxCXFazS8+G78L/A2MKJYPqw4tqYWr/msyF2/FutSVi+Rv9P3ACaXJ3JSSm8BF5JbHW1Zw6aWJydhX62YXzpOa/meml7crlgWx8uVCaZi/mPAI8AmNWy31LX4koh4MnI34DeL/yt7V6w3mZy4rexSdmgt+6kS51xyS71qn9M1IuL84n/GOxHxfOSuyqtWrLdSRPw4cvfD0jF6X0R8vVi+Pc0tBk8si3naksTcnlq+z8JzjLrqws9xC0VyfCbwLsqeY0R8GPgg8P2U0n8iYqmIGLrET0JSp9iSSVJ3+jK5if4vgNeAzwDfi4jnUkqXV6w7knwx9hvgtxQtCCK3BripePzPyScX7wP+D9guIj6SUppfXLDeSG4S/VMWbyb9IeDSJYktIo4AziN3ATulmH0ocG1EfCmlNLGtF6C4aPgtucn3KeSm44cBn2jrcWUuJXerORg4tmLZ/sAypecWER8BriNf7JxRPK93Ax8F1iO/JktqqShL1BQWpZRe6cQ22/MUsEFE7FNLF6yI+BK5NdXM4nY6+bjanXxRXWrRMIn82t1YrLc6uZvfnRHxoZTS/RWb3ot8vJ0P/Iyi9UlEjCvu3wVMIF+47kxuBbFuSunr7YR8E/CZiBhS1h1nJ2AR8JGIGFj2K+2OwGMppf+2sq1byV29vsXiXUwq1+/IZ7I15cfCIGAU8G1yy5FflK8YOUF0DTCV3F3sFWAb8mdhc3KLklKrnlIy5mfk924VcquQrcldn64B1iC3cDgdeLxY/6ka4x5W5Rh+q7jw7g6luD4VEZNSSpUJj8VExArkblgbA1eTj7eBwBbAJ4Eri/XKu5GeB7xAPsa/R/5ubKqy+RuL9U4lX5y9Fbne0B3kz8hF5BYGawBHAHdHxNiU0vQq2yqPeWlgubbWKbOwvdeA/H29NHBnlWV3FbdbAve0s50bgA8Al0bED8if/U3Ix+DjFK9lueKCtDSNIb+e7wC1dM8bQH7tWvt8VtobeC+5hdR0YGWKVoAR0VT2WZxA/kH4QyzepeyOWnZSdrwHMLzYx8bFdsvXG0l+zQeTP8NPkf9nHA7sUBwLrxer/wb4MPlz+hC52+6GwPbkllyPk1uS/Rj4Hc0tG9+qJeYO8hyj8+cYPflzXIpvJfJ34Srk/xm7AjdXfHd/vLidERF/AHYDBkbEv4FTamiJLakrpZScnJycOjSRT4AScGgry7cvlj8PDCubP5Tc3eDOivWnFet/ocq2HiSffC1XMX/v8hjIJzUJOK6d2GuOjfwL9lvkC+Tly+YvTz4JfxNYoWz+ZGBa2f2BwAzyBc4qZfOHkS8sWn0NK2K+t4h3YMX8fxTbHlzcP7PY5qpd/H6nVqYX2njMfrU+vza2sQ35Ii+RT+gvIl/0bFhl3RHkGiSPlb8nZcsHFLc7F9v7NRBly99HPjn/R9m80cW68yv3Sb6gnAtcXmVfZ5MTLuu08/yaiu3vXNxfGpgN/KqYv1XZ8bIA+EmV4/jQtuYt6WdyCY6FV4A9KtZdhpzYuBUYVLHs6OJx2xf39yju79/O/g8tf1yNMZ/URtxfbuNxj1D2eV7CY/i6Yj9vky9QTyMnhIZWWfenxbrjWjt+i79vL46HzcrmBTlhkYCdyuZfUsy7rJXjdA7wvor5o8iJ1EtqeH6l96OWqd3XEti3WPfwKss2KpadXsN2li5ez7kVMVxP2Xd5xWN+WLHuI8AuNb7PRxSPOaXG9d9VZd5Q4AlyMrl8/iVA6uBxN7mV92ABcFKV9X8PvAiMqJg/tvwx5O+iBPy0nf2PLtZrsa8a4y8dV4e2snx7PMeArjvHKL3ePepzXPaYl8r2P5+cKBxesc7viuUvkr8jm8jJtkeK+YctybHo5OS0ZJPd5SR1p4tT86+fpJRmk3/FWr/Kuq/Q3BUGgMjdlzYjF89dOiJWKU3kX/zfJheAhlxUEvKvros17+9EbDuTf/U/J6X0Rtm6b5AL9i5LbiXUmjHkIr8Xp7K6IMV+f1ZDjCWXkpMaO5dmRMTawHbAFSmlUteH0vPZNyq6InWBu4v9l0+f6uJ9LCaldCf5NbyUfNJ8GPnC8bGIuDUW79LzKfKv8CenlF6rsq1FxZ+l7igTUkqpbPmDwB+AD0bE8IqHX59Serxi3n7kC9lflB+XxbH5B3Lrg7aODWhuuVMabWcbcquAUpHUnYr5HyFfTFTtdtVBHflMtqb8WNiV3LJoOnBlRHysbL2dyb/kXwysUPEa/alYp/Lzu1tELN/B51SrI2l5DP+hm/ZVsi9wFPlCZ3tyMenrgBci4mullYqWMAcAj6cqLRdKx2/x3bYtcF1K6aGy5Ynm1inVRl76YfmdiAjyRditwMyK9+Zt8jFRy2iBN9DyNW1tqtbCqlKpe0u1YuhzK9Zpy0Jyi5S/kYtf70NuxfRR8nFarSvRz2n+XjuziKGy5VsLEbFtsf6D5BZ27Upl3YojYmjRkm8o+TO+YRd9Buay+Ov/GXIy6cSI+E7Z/oeRW8pdB8ytOBamkRMgpWNhDvl12Tp6xihvnmN0zTlGT/0cl+wDfAz4HDlZP4SWLa9K998EdkgpTUopXUxuZfYacHrxPSupDuwuJ6k7Vatv8jK5a0Clp9LiBRwhN8EHOLmYqlkNIKU0PSImAMcD/4mIB4C/A79JKd27hLGtXdw+WmXd0ry2apeUlv2ryrLH2nhcpSvIF0gHA38p5h1Mbr1QXrD2XGBPciLmexFxW7H+FSmlWR3YXzUvpdpGg+lSKaWHyb+ylkaO+Qj5ovFDwO8jYkyRZCuduN/fzibXJndHq0waQX5P9yrWKX+9qnUzLB2bbb0mq7UVSMp1I/5Fc5JpR3LrsIcj12rakdztccci5sltba9GHflMtqbFsRARvwb+DVwYEeukXGuo9Bpd1Ma2Sp/fWyIXhT0UaIqIe8mv7a9TrnfTFe5J7Rf+7lLF63AucG7kgttjyN06jgJ+GBHPp5SuICczVqT5892atr6THicfJ9W+kyqP4eHk93wXFj/Wyy1qZf7/pFxctysL7M4ubpeusmyZinXacgk5Gbdxau6K+ruImEruhngIuTbM/6SU/k0+hgGuLhKmf4mIVLxHLRRdra4nt1r5RKqx62WRpDiN/H1dLWGxAp0vCr+wynf2pIj4M3BSRFxdfLY2ICfFP19M1TwNkFJ6JyK+Sm4F90xEPEZOjF2bUvp7J+NdEp5jZJ06x+jBn2MAUkq3lt29OCKuAG6PiI1Sc9e90ue8/Ic3UkqvRsR15HOmDaj+v19SFzPJJKk7VZ7QtaXaCUdpCJkf0frFV+kEg5TSCRFxEbkWwYfIyYivR8T3U0rf6ERsDZVSejki/gTsFc0j4nyW3Orh3or1tiQ/953JdTN+DJwcER8vWgb1WinXh/llRPyK3FVwO/IQx7d1867bOjYPpvWT81qKSN8EfKloTbAjzcW7bwLOKGpl7Ag8kNqvg1GLbjnuU0pvRMSd5Ivm9ckXOKXX6OvAA608tFSImZTSIUX9nN3Ix/DXgPER8dWU0rndEXc9FcmO24DbIuJm4K/ki/qqCYwu3nflMVx6b/5Grj20RIrE2bAaV19YQ7K7dDxUG268NG9mOzGNJLe2OLcswVTyG3KS6SNUJJkqpZRuiFwo/QiqvEcR8X5yq4rXyS0n2oyr7HFBfu83JCdrphTbWEhurXkQ3Tswzw3kFojbs/jn9DJa1hUq+d/rmFL6WeSBCT5Bfh33A74SEb9OKR3QyuO7i+cYXaAnfo7bcSm55ec+NNcBfK64bTEgCM3/o1esskxSNzDJJKknK/2qXO0X2apSHh3qJ8BPImIZ8gn1cRHxo5TSix3cfylJsDH5F8tyG1Ws09bj31tl2UZV5rXlUnIrm09FxBPAusA3K1cqfqmdXExEHpXuPuAEOlAItCdLKaWIuJucZCqdsJZaamxO2wXOnyZfwG1I2ehChdJ78kwNYZSOzc628LqJfBH7CXLCrNSd4+/kLgF70FywuD2p/VW6VakLUqnbQuk1ersDn99HyF3LfhC5EPbdwHcj4ryiS1ijn2NXKRW/LR2/L5EvZt/XzuNKx+bGVZa9l3xs15LcnEXuQrJ8J4/fT1PRBakN08m1etryMLmLzTZVln2guG2vNVrpNR1YZdmgitv2LAOsVDmzSDD9jeauOW0WSK+wGfl9PiWldGLFdr9QZf2uPuYrP6dTi30M7sDn9D/kJN2FkUc0/BVwYPF/9t5uiLm7eI6R9cTPcVuGFLfln817yMXWR1RZvzSvo++PpCVk31RJPdn95AvOL0eVIbUjD0W/UvH3sMo6G0XXhVLT6CX5BetGck2GoyLif/3/i7+PIhfsvLGNx99H/nXtsCgb1aqot1HTMPdlridfiB5cTIvIvzz/T7QcOQtyM/o5lJ2MFa/Ve1tZv8eIiJ2r1ZYqfnUt1ckodQm4mlwk/MRq9UyK1gMA1xa3x5fNIyI2ISd0bquxa+FV5JPok4t4Kvc3rGiF1J6byRdkJ5Av/m6C/yVbXiQXrQ5qq8dUGr2pxUVxd4tcx2pbcr2N0ntyA/k5fLP0Oa14zJDS5yrysOiLnZMUtbWeIdfuKHWxaNhz7KjIw6Kv18rivYrbx+B/NZeuADaKiBZdlkrHanERewewe3HMli8/vrj7u/ZiK/Y3CdgqIvZrJf5a6s50aS2XlIc4/wOwfUT8L+EWEcuSW438m7IRqVr5LnuC3IpkryJRWe7Q4vZ/LUAjYvVqsUQeln4YzQnB0vwtyN/7b5ETTLUkpcuVWrhE+czi/axWT+utYnmnj/niONmzuHsf5Baw5Bpp+0TEB6o9pvh8l+pHLVZLp/hho5SwL8XYWz6nnmNkPe5zHBHvKtZfTJHUPLK4W/7ZvJac9P1M+eMiYg3y9+2TKaWp7cUuqWvYkklSj1W0WPks+QL7oaKZ+qPki871yE2ljyfX39gBmBgRvyVfZLxFrn/yBeDulNITS7D/1yLiOPLwwndHxCXFokOL/X+pvLBnlccvjIijyQmJeyLiAvJIPZ8j12YY2YFY5keuQ/CV4nn9rUr3jAsiYgS5K8Z08q99nyb/Yl1eu2lv8q+WJ5OTGF0ics2k0jDbpZYWuxcxAfyqg7/4/xhYOXI9hYfJ3R3WIncneQ/wy6JmEyml5yLXCjkPeDhyfZ/p5FYNe5Jf8wdSSjdGxFXkpvYrRsQfgdXJJ61zycNWt6vY3+HkX/Mfj9yFbzq51s2m5JPajciFc9vazisR8SC5Bda0igvWm8nv33xy98D2PEY+yT4iImaTW6q8mFLqioLh5daMiM8Ufw8kH8efJ9eRGV905ySl9HZEHEw++X+i+PxOLdZ7L/nzuze51d3BwNER8btinfnkrjgfA64q6/Z0LznBOj4iViRfoD2TUrq7K55Y8X0zqrg7HBgcEScU96enlH7Vgc29D/h15Ppak8kXg+8Ctgb2J79Xp5StfwK5a+SFEbELuWtdAFuQz9dKn63/B9wC/CMiziN3D/kk+bW6vAO1ccaTWwNeVXwm7iInakeR60bdR3NSpqpuqOUC+Tt9J+CvEfFjcm2iL5I/y58oWrSVtPguKz5TZ5G7W95ffO++Qn6uTeRRu8q7yv0pIl4mD7c+g5xY+iD5e+M5yr4ji++4G8kJhXOAbSMX/i73u1RW2LuKx8n/x44rEjZPkL/PvkT+nhtTsf5d5O/9n0bE9eTPxt01JLcGlX1OIdd+2of8OvyVxVvOHE4+3m4tvjvvJ/8QvQ75dfgl+XV4D3BL8Tl9hNz6bsPi8c9QfE8VXbenAgdExFPAf8ktGru70H6HeI7xv+30uM8xudv1LRFxNfn1fqV47IHk2kqXppT+93+xqL10LLmA/13FezmYfGwOJiftJNVL6gFD3Dk5OfWuidqHF26xnCrDMZMvxCe3sb9R5JFSppEvgl4mXwCdAaxVrLN2sc7j5JOZt4u/T2HxYYQ7FFsxf29y64G3i+kOYK8q602myvC+5BPVB8gtX54FTiX/IljT8MJl2xlD8zC+Ta3s5zryhdE8cpeYW4B9W3n/Tqpxvwn4Yw3rbV8WX7Vp+w4eZ7uQT74fJLfiWlC89zeTT6IHtPKYUp2UueTuBBcAK5etMwj4RnF8zCOfvF4LbFqxrdHtvU7kC7bfkVvsvEOuRXEz+QJ3mRqf54+K/fyiYv4Xi/m31/oZIycH/knz0O2Tl/S4b+NYqJzeII9SdkArj9mE3OpuZvEa/Zf8Gfo2sFKxzubkLqFTyZ+xN4r3/WvA0hXbO4ScUHun2P8l7cR8UrHe2Bqe3+Q2jt9Wv6Na2daqwDHAn8nfXXOK9+Xf5Auh9ao8ZgXy6IJTaf6u+wewf8V67yuO2VeKY/hx4DhgYEfeW/LF9LfJyY055MTX4+TPzNYdeb5dOZETF78nJ0pnkxMgH62y3qFU+YySk3NfJHe3fIucmJlG/j6pHPr8cHLXt+eL1/xtcsucMyj73qj4HLU1ja7h+Y0i14eaVTy/e8j/Z06q3AY52fND8vf6Qmr4v9HKcTyneJ+/RcVnqnjMKsAPyN2N5xav/cPkulEbFeusTE7+P1Asn1Mcq2cBa1RsbyvyUPJvF/uf1oH3v/S+Vn2eeI5ROb9LzjF60ue4OB7PJf8feIX8GX6J/P+9CYhW9rkPOTH7Nvn77K/Ado16DZyc+usUKSUkSZIkSZKkzrAmkyRJkiRJkjrNmkySpLqJiMHUVgx2VsoFZaUepSgEPLid1eakNmqpSJIk9VUmmSRJ9bQtuWZRe9amnaLZUoNcQy5K3pZLaadotiRJUl9kTSZJUt0UI4JVjp5UzW0pDw8t9SgRMYb2hyt/PqX0WD3ikSRJ6klMMkmSJEmSJKnT+mx3uVVWWSWNHj260WFIkiRJkiT1Gffdd99LKaXh1Zb12STT6NGjmTJlSqPDkCRJkiRJ6jMiYnprywbUMxBJkiRJkiT1TX22JZMkqX958UW45BJ46CF4/XUYNgw22wwOOwyGV23MK0mSJKkrmWSSJPVq994LZ5wBf/5zvj+3bEy6a66BE0+E3XaD44+HLbdsTIySJElSf2B3OUlSr3X++bD99nDttTm5VJ5gApgzJ8+79tq83vnn1z9GSZIkqb+wJZMkqVc6/3w49liYPbv9dVPK6x17bL5/+OHdG5skSZLUH9mSSZLU69x7b+0JpnKlRJODj0qSJEldzySTJKnXOeOM3BVuScyZkx8vSZIkqWuZZJIk9SovvpiLfKe0ZI9PCf70J5g1K9+fNAlGj4YBA/LtpEldFakkSZLUv5hkkiR1WEcTM0uSyGntMZdc0onACxF5O5MmwbhxMH16Tj5Nn57v1/P5dOU+JEmSpEay8LckqUNKiZlSPaRSYgagqanz67f3mIceajmKXEfNmQPXXQf/+lfLuk6zZ8Mxx8Cqq8KgQTBw4OK3N9wAp57aHENnn09XvWalx40fDzNmwMiRMGFC2+sv6WMkSZKkaiItaX+DHm7s2LFpipVdJaldtSYZFizIXdXGjIEXXmi5fPnl4ZBDcgKnfLrpJpg3r+X6gwfDVlvBUkvlv5daqnm6/np4++3qj1lqqerLeoIVV4Rll205/f3v1YuUDxsGxx+fn9OgQc3Tt74FL7/ccv3VVstd/YYMgWWWybelv6++evHEFMDQoTBxYu3Jr1oeU3qciSlJkqT+KSLuSymNrbrMJJMk9R0dvfivlmRYain4xCdg5ZXhP/9pnl58ERYtanv/w4blJEUp+TFkSNsjue2wA8yf3zy9806+feKJ1h8zalRu2dNZe+yRR6n7z39aLlt9dfjNb2DhwpxcK7/dc8/W60EddRS8+Sa89dbi0wMPdD7eJbXUUrD55jlBt/TSeSr93Voyb7XV4NZbYY01cpIsonmZiSlJkqT+zSSTJPUD1S7+l1kGjj0WNtlk8YRRaXr88dYTR6uvnpMMldNJJzUXzS43ahRMm9Zy/ujR1ZNCra3f3mOOOAJOPLFzXeaGDIGTT4Z3v7vjCZOufD4jR+b3YMGCPM2fn2+33hpmzmy5/qqr5tjmzMnPv9RabO7cnMBpza675tZk77yz+G1bybySoUOb3/vVV8/dBd98s+V6q68Of/vb4gnGIUNyQuvyy01MSZIk9RVtJZmsySRJPVR7F9hvvQXPPANPP52nE09s2SVr7lw47bTm+4MHNyeP1l8fHn20+r4jqrfwgdxaqVrCYMKE6utPmNCx9dt7zM475+faGSnBoYfC8OH5fkcSGV35fE4/Pd9W+t73qq9/5pm5JVU1Eye2nvz685+rP6a15Neqq8IPf5iPgRdeaE5KPvxw9QQT5PU22aTl/AED8utd+ZvW7Nlw5JF5e+9+d57WWCO3oho0qL51qSRJktRFUkp9chozZkySpJ7isstSGjUqpYh8e9ll7a8/dGjp0jxPgwaltM02KW29dUqrrrr4sramiJQefjill15KadGixfczalT1x4wa1fXPpyPrt/eYvffO82t9DSpfj332aX//9Xw+XbV+5TEzdGjbj1uSx7R2zAwfntKVV6Z08cUp/fSnKf3oRymddlpK48d3/P1ZbbWUllqq+vLVVkvpiSdSeu21lsfzkr4GHX0vJUmS+jNgSmolF2N3OUnqZq3VsDnjjFxE+7nn4Nlnm6fnnoP77qvejW3gQNh+e1hnnZbTFlvk1huV2urGtaT1dRrt3nvz61CtmHZ7hg6FW26BsVUb+PZu9RhdbkmOmba6C95xR24l9fzzeSr9feGF7T5dll46t3wqTZMn5xZ+lVZfPb/nyy+fpyFDcmu93nr8S5IkNZI1mSSpC9VyUb5wYa6p89RTsN9+8Mor7W93ueVgrbVgxAj461+rrxPReg2l/laQ+fzzc72pjiSahg7N3cAOP7z74uoPGpmYWnVV+NGP4L//XXx64QV48MHa4h84MCeb3ngjf1YrrbwyXHNN/jyuuWbuZlr5fHrjZ0aSJKkrmGSSpC5S7WJ56aVhn33y8PVPP50TS9Om5SLO7fnzn3NSaa21cq2jkiUpLl2Krz9d/JYSTXPmtD7iG+Tk3JAhJpgaqZGJqeHD4ayzclKpfPrJT9qPOyK3hFprrRz3W2/B3/+++Od7yBC44IKuT+b2t8+zJEnqHUwySVIr2ruIW7AgJ3WeeCJPJ55YvTsO5CTRuuvmqdSFbd114eCDc/efSn2xG1sjTJmSux7+6U85ITBnTvOyIUNy8unjH4fjj++bXeT6su5OTLWWlHr3u+HSS/N+S91YS38/8UTrCc1VV4VVVll8WnnlvI+rr86j+pW0l5jyO0CSJPVUJpkkqYpqF3GDB8Muu+TuNE88kVsl1dIiyW5sjTdrFlxySR4B7dVXc8uyTTddfBQ59X0d+dwsyWezNFpeNePGwcsvw0svLT5V65JXstpqiyekSn+ff34+jivZmlGSJDWaSSZJKrNoETz+OHz4w63XStpoI9hgA3jPe/Jt6e+xY+3GJvUlHf1sdrQra0o5ad3a6dYXv9icmCpPULWVmFpnndxqqnJ68kn4xS9g3rzmdU1oS5KkrmaSSVK/Ue1iaZ998mhkt90Gt98Od95ZvYVASXe0SpLUN3Rlrai2ElOjRuXueZWWWw722ANefDEXPH/xxdyKr62k1KBBsOWWuYXU8OHNraWGD4dHHoHzzoO5c2t/PialJEnq30wySeqVuqIey4AB+baUNNpwQ9huuzyNH9/xWklLEpekvqUeRcw78phFi3LifPjw1ltM7bRTcyupWbMWrw9VzVJLwQ475G2WT489lrvydbS1lCRJ6jtMMknqddq6wNplF3jmmZwIKp9uvDEX6q60/PJ5e9tsk2ue1LIPL5YkdaV6jC5Xa4uplPIABrNmwXrrtZ6Y2mqrvM6sWa0PeFAybBhcdhlssUUunB7RueciSZJ6LpNMknqdkSOrdxWJaHlBtNJK+eLqn/+svq32ur954SOpL+jOrnxz5uRk0+jRrSelSoYPz8mm978/3z77LHznOw5+IElSX2GSSVLDtXWxMHs2PPAA3HdfHo5+ypTcJaM1Z5+dL3RGj84XQsOG5fkdrXsiSX1Nd3fla+17dq214PLL4f77m6dHHqneurRk+HC49to8wt5qq8Gyyy55XJIkqX5MMklqqGoXC0stlbuvvfpqTiiVitautlouUPuPf8Drr7fcVlsJIy9KJKnjOpKY6sj37Lx58OijMGZMbXEMHdqccHrwwdx6qtLIkdWTXEvyXCRJ0pIxySSpIVLKCaEtt8xDc1caMAA+9jEYOzZPY8Y01/JY0oSRFxiS1L26qlbUGmvARRflUfIqp7/9rfXtjRkD73kPbLBB8+3668N11/lDgyRJ9WCSSVKXau0CY86c3OXtjjvgzjvz9N//tr6dtmoltbUfSVLv0ZW1opZbLreCfeKJ/L+h/DR24MDmVrHlbP0kSVLXMskkqctUu1gYNCjX43j22eb6G+utly8Ett0WTjkF/vOfltuyVpIk9Q/dUStqzhx46qmccHrySfjWt1rf3sYb5xZPpem97823119v6ydJkjrKJJOkVtVy4p8SzJyZWykdckj1WklLLw1HH50TSx/4AKy66uL78CRektQRXdUtb/nlYfvtczLqqacWL0Y+YED1FrW2fpIkqXUmmSRV1Vry54wz8knzffc1Ty++2Pa27PomSWqkWn7QmD8fnn46J5yeeAKOO6717W27bW4BVT6tvnoeRc8fTiRJ/ZlJJklVtfarb8mAAbDRRrnIamk64IDcLa6SXd8kSY3WVa2fll0W3v/+PDpe+cAVK66Yk0vz5rV8jP8HJUn9RVtJpgH1DkZSYy1cCPfem1srtZVguuMOePNNePhhuOQSOOqo/KvuGWfkX2zLDR2aT+QlSWqkpqac6Fm0KN+217JowoTq/9N+9jO45RaYNQteeAH+/nc45xzYf//qCSbI/1O/8x347W9zt7zy1r2TJuWE1oAB+XbSpCV/jpIk9WS2ZJL6kGq/4B50EEydmoeD/tvf4Kab4LXX8vpLLZW7DlRq79dYu75JkvqKrmr9NGhQTiyVkkvLLgubbQZDhsA//gHvvNO8bi3d6/xfK0nqqewuJ/UD1WpRDBwIw4bBK6/k+yNHws47w0c/CjvuCDfeaF0JSZI6oq3aT/vsk7vYPfggPPBAvr3ttjyARqV3vSuPiLfeerDuunlaYYX29+H/Z0lSo5lkkvq4N9/MJ6ezZrVcNmQInHlmTiytu24u0F3OX0olSeqYjvzvHDCgepKpmpVXzv+rH3lk8QRTiXWfJEk9gUkmqZdq7SR23jy4885cI+Lvf4d77sm1lqppb9Q3SZLUfVrrXjdqVE4mPf10ruH01FO5e/tTT+Xu7a057TTYfHN43/tgzTUX//HIH44kSfVgkknqhao1lV9qKdhgg3wCOmdO/nV0yy1hp53gwgvhxRdbbsdfPSVJapwl6frWVt2nBQua76+8ck44bb45vP12Hqhj7tza9yNJ0pJwdDmpF/rmN1s2lZ8/H/71L/jCF+Daa/OwynfdlX+pPPNMR32TJKmnaWrKiZ5Ro3Kro1Gj2k/8tDbq3SWXwOuv5zpP554Le++d7597bh4RrzzBBPk84utfX7zoeCVHvpMkdSVbMkk9yBNPwHXXwe9/D7ffXn2dtrq/2UxekqS+oSP/0xcsgMGDW6/9tPTSeaS7sWNhzJh8u9FGcNVVFhiXJHWc3eWkHqDayeIBB+TaSr//fU4uPflkXnfzzeGZZ/Kvk5Xs/iZJkiq11sVulVXgsMNgyhS47z544408f5ll8o9W1Vo5ea4hSWqL3eWkBivVY5g+Pf/KOH06HHIIDBsGH/oQnH12Pjk899y87P774bzz7P4mSZJq01oXu7POgu9/H266CV59Nf+gdfnlcMQRrXejmz4dLrssd9Gv1nraLnaSpNbYkkmqgxEjYObMlvOHDoWLL4Zdd4Xll2+53O5vkiSpVh09b2it9VNEc9e75ZaD978/d7EbOxaefx6+/W272ElSf9ZjustFxK7A2cBA4MKU0ncrlo8ELgVWKNb5ZkrpT8Wy44HPAwuB/0sp3dDWvkwyqZHmz881la6/Pk+PP159vbbqK0mSJHWn1ka++9nPYIstche70vTAAzBvXuvbsoudJPUfPaK7XEQMBM4DdgM2Ag6MiI0qVjsBuCqltAVwAPDT4rEbFfc3BnYFflpsT2qIas3E//vfPOrL/vvD8OGwww65G9yaa8KKK1bfzsiRdQxakiSpTGsj3332s7DJJnDoobkr/113wZtv5u78rZk+PW/vJz+Be+9t2RXPLnaS1D8MquO+tgKmppSeBoiIK4E9gcfK1klAqdPQMOD54u89gStTSvOAZyJiarG9O+sRuFSu8le/6dPh4IObWyStsQbstx984hPw0Y/mZuat/VJofSVJktRITU21dXNbaqk8MMmoUdW72A0ZApMn53pPkAuLjxkDH/hAbuF9wQUwZ05eNn16Pi8q7V+S1HfUs/D3msCzZfefK+aVOwn4TEQ8B/wJOKoDjyUixkXElIiYMmvWrK6KW/qflODYYxdPFkFOMA0bBv/8Z669dOGFsPfeOcEErf9S6ImVJEnqTVorMH7BBfkcaMYMuOqqXFg8pdwS6pxzmhNMJbNnw7e+Vb+4JUn10dNGlzsQuCSlNAL4OPCriKg5xpTSxJTS2JTS2OHDh3dbkOp/HnkETjgB1l8fXnih+jpvvJHrF0RUX97UlGsVLFqUb00wSZKk3qa9H87WWgs+9Sn40Y9yfco33mj93GjGjLzuuefCww+3rFNpFztJ6n3q2V1uJrBW2f0RxbxynyfXXCKldGdELAOsUuNjpSXS2kgsTz8NV14JV1yRk0wDBsBOO8Frr8HLL7fcjvWVJElSf1BrFzuAwYPzOVK1LnbvehfcfTdcfXW+v/LK8KEPwUc+Am+/Daefvnh5ArvYSVLPV8+WTPcC60fE2hExmFzI+7qKdWYAOwFExIbAMsCsYr0DImLpiFgbWB+4p26Rq88q1UqaPj036Z4+HQ47DNZdN0/jx+ducOeem4fs/etfczHvas3Era8kSZLUUmtd7H7+83zu9fTTcPHFsPvu8OCDcPTRuQV5ZXmC2bPzuZkkqeeqW5IppbQA+ApwA/A4eRS5RyPilIjYo1jta8AXI+JB4Arg0JQ9ClxFLhL+F+DIlNLCesWuvmv8+JYnMPPnw7PPwve/n7u13XYbHHkkrLZaXm59JUmSpNq1de4UAWuvnUeyu/jinHCaMaP1bU2fnrvYff/7cNNN8PrrzcvsXidJjRcppUbH0C3Gjh2bpkyZ0ugw1ENNmwa/+x0cc0z15REt6wJIkiSpPkaPrt7FbujQ/MPfM880z3vPe3JXuylT8o+F5ev6Q6Akdb2IuC+lNLbasp5W+FvqlLZ+wfrXv3Lf/jFj8i9mxxyTh+OtxvpKkiRJjdNaF7uJE3Nrp5deghtuyOtttBHcc8/iCSbIrdX/3/+DqVNzWQRJUvczyaQ+o1p9pS98AfbeGzbeGDbcMHePGzw4N7GeOjU3y7a+kiRJUs/SXnmClVeGXXaBb30rt05vrQX6yy/n0YFXXTXXfDr9dLj5ZnjrrbzcLnaS1LXsLqc+o7Vm1QA77AD77JMTTmuuufiy1kaXkyRJUu/Q2nngGmvAySfDnXfm6V//yvMHDIARI/LALgsWNK9vFztJal9b3eVMMqlPeOstWG656susryRJktS3lVq0lw/oUi1h9MorcPfdOeH0gx/A3Lktt7XWWm0XH5ek/s6aTOqz7r8fDj8c3v3u1texvpIkSVLfVuvovyutBLvtBqecAvPmVd/Ws8/CQQfBX/4CCx3PWpI6xCSTeqzW+si/9RZceCFstRW8//1wySW5G9yJJ1pfSZIkqb9qasojCC9alG/b6/LW2g+Ryy6bE0y77ZZbNX396/DII83LreMkSa2zu5x6pGpNnpdZBrbbLo8e8uabuZj3l74En/kMrLhi8+OsryRJkqT2tNXFbr/94Prr4dJL4U9/ynWb3v/+PJLdb38Lc+a0fIznnJL6C2syqddpq4j3wQfn5NI22+Tm0JIkSdKSqOUHyhdfhCuvzAmnf/6z+nZGjcqtpySpPzDJpF5nwACodmhaxFuSJEmN0to5KsCjj8KGG/ojqKS+z8Lf6jVeegm+853Wl1vEW5IkSY3S1rnoxhvDOuvAUUflmk7lI9dZx0lSfzGo0QFIAM8/Dz/6Efz85/D22zB2bC6wWP7P2SLekiRJaqQJE6rXcTrjDFh66VzH6Re/gHPPzfM/+lFYZRW44ormOk7Tp+dtgHWcJPU9tmRSQz3zDBx+OKy9Npx9dh4l7pFH4N578why7Q1DK0mSJNVLU1M+J608R/2//8s1Q6+7Dl5+ORcLP+wwePBBuOiixQuFQ05SjR/fmOcgSd3Jmkyqm/LCiquvnpsT33UXDByY/wkfd1yeJ0mSJPUFKeVz3dYuuT7zGdh22zxtsklet8RRkyX1VG3VZLK7nOqicojY//wnT7vtBhdcAGuu2dj4JEmSpK4WkRNE1UZNHjIE/vY3uOyyfH/ZZeEDH8gJp7lz4Sc/sYudpN7HlkyqizXWgBdeaDnf4V4lSZLUl1X+2Aq5XtPEiXDQQflc+I47mqeHHmp9NGXPnSX1BF0yulxEbBoR50bEnyNijWLeXhGxRVcFqr7ngQdgr72qJ5ggN/+VJEmS+qrW6jg1NeX7a6+d/z7vPLj/fnjttTy/munT83JJ6qlqSjJFxC7AvcCawI7AkGLRusCJ3ROaerOHH4Z994UttoDJk2HYsOrrtTUMrCRJktQXNDXlFkiLFuXbtrq8Lbdc2+fIq68On/50HsluwYKujlSSOqfWlkynAseklPYG3imbPxnYqquDUu/16KOw//6w2Wa5j/mJJ+Z/pOedl5sFlxs6NBcwlCRJktRswoTq586nnpq73v397/DJT8KIEfC1r+UudpC75o0eDQMG5NtJk+oduaT+rtYk0ybAn6rMfwVYqevCUW9R+Q/sBz+AAw+ETTeFv/wFTjgBnnkGTjoJVlih7WbCkiRJkpq1du58wglwzjnw/PPwu9/BNtvkAuHve19e57DDcpe6lJqLhZtoklRPNRX+johngQNSSrdHxJvA+1JKT0fEvsD3UkrrdXegHWXh7+5TrXghwODB+ZeUr30NVl65MbFJkiRJ/clLL8EVV8Cxx8I777RcbrFwSV2trcLftSaZvgd8CNgfeAwYC6wBXAJcnFI6pcui7SImmbrP6NHVh2EdMQKefbbu4UiSJEn93oABuQVTpQhYuLD1YuKS1FFdMbrcCcAzwHRgWXKi6SbgNsCqOv3I/PnVE0wAM2fWNxZJkiRJWWvFwlOCMWPg8sstFC6p+7WbZIqIAcD6wJeK2/2Bg4D3ppQ+m1Ja2L0hqqeYPBk237z15Y4UJ0mSJDVGa8XCv/AFmDMn13labz04+2x4663GxCip76ulJVMCHgBWTyk9nVK6OqV0VUrp390bmnqK55+Hgw6CHXbIdZiOOcaR4iRJkqSepLVi4RdckEeAvu66/KPwV7+ab8ePh5/+1NHoJHWtWmsyPQyMSynd2f0hdQ1rMnXe/Pl59IqTTsp/f+Mb8M1vwpAh+R/Q+PEwY0b+JzVhgiPFSZIkST3dXXflkaGvuablsqFDHQFaUvu6ovD3buS6TEcCD6ZaHtRgJpk6Z/JkOPJIeOwx+MQncrPadddtdFSSJEmSusKaa+YeC5VGjmy9BqskQdcU/r4K2Aq4D5gbEW+UT10VqBpj0qTmZrIjRsC22zZ3jbvuOvjjH00wSZIkSX3Jf/5Tff6MGbml08sv1zceSX1DrS2ZDmlreUrp0i6LqIvYkqk2kybBuHE5oVRun33gssty1zhJkiRJfcvo0dVbLC29NMybB8ssk+uyHnkkvP/9dQ9PUg/W6ZZMKaVL25q6NlzV0/jxLRNMAPfdZ4JJkiRJ6qtaG43uF7+Ahx+GQw+FK6+EMWNyT4fLL4d33lm8F4TFwiVVqqklE0BELA00ARuRR5x7FLgipTSv+8JbcrZkat+iRTBwYPVlEXm5JEmSpL6pvcF8XnsNLr0UzjsP/v1vWH55mDMnDwpUYrFwqf/pisLfGwF/AZYHHi5mbwq8DuyaUnq8i2LtMiaZ2vbss3DIIXDzzdWXjxoF06bVNSRJkiRJPdCiRfC3v8Fee+UkUyWvHaT+pSsKf58N3A+MTCl9KKX0IWAk8CBwVpdEqbq54grYdFO49174wheqN5OdMKExsUmSJEnqWQYMgF12gblzqy+fPh1mzqxvTJJ6plqTTNsB30op/W8kueLv8cAHuyMwdb3XXsvF+w46CDbaCB54AC64IDdvHTUqd5EbNcrmrpIkSZJaGjmy7WV77JFHp16woH4xSepZak0yzQVWqDJ/WLFMPdzNN8Nmm8FVV8Epp8Ctt8K66+ZlTU25eeuiRfnWBJMkSZKkSq0VCz/zTPjGN3JPiT33zD9cn3ACPPNMXsdi4VL/UWuS6Q/ABRGxXUQMLKYPAj8Hruu+8NRZ8+bB178OO+2UhyG94w749rdh0KBGRyZJkiSpN2lqqt4L4uij4fTTcwHx3/0ONt8czjgj/6i96abwuc/lLnUp5dtx40w0SX1VrYW/VwAuBXYHFhazB5ATTIemlF7vrgCXVH8t/F0+QsTqq+dk0rPPwpe/DD/8IbzrXY2OUJIkSVJf9+yzcNFFcOqpsHBhy+UWC5d6r06PLle2ofWADYu7j6eUpnZBfN2iPyaZJk3KvwrMnr34/K99LSeYJEmSJKmeBgzILZgqReTaTQNq7Vsjqcfo9OhyETE4IpZJKU1NKf2hmKZGxDIRMbhrw9WSGj++ZYIJ4Oqr6x+LJEmSJLVWLDwl2GCDXM/p1VfrG5Ok7lNr3vg3wBFV5n8ZuKrrwtGSmj8/92+uZsaM+sYiSZIkSdB6sfAjjoDVVsu9LtZcE774xTz6dYnFwqXeqdYk03bAX6vMvxHYtuvC0ZKYMgW23LL15W0NNSpJkiRJ3aW1YuHnnQe33Qb335/XmTQJttgCttsOjjwylwGxWLjU+9SaZBoKLKgyfxGwXNeFo454++2c+d96a3jxRfjqV6v/SjBhQkPCkyRJkiSamnKR70WL8m1TU/OyzTeHCy6AmTNz17kXX4Sf/rRlGZDZs3N5EEk9W61JpoeAA6vMPwh4pOvCUa1uuAE22SR/EX/xi/DYY/DjH1f/laD8S1ySJEmSepoVV4Sjj4YnnsjXMtVYBkTq+QbVuN4pwO+L0eVuKubtBHwK2Ls7AlN1L72Uv3wvuywXyrv1VvjQh5qXNzWZVJIkSZLUOw0YkMt9VKs3O3Bg7mb32c/C8svXPzZJ7aupJVNK6U/A7sAo4JxiGgnskVL6Y/eF17+VF7sbNQoOPxze+1749a/h29/OhfHKE0ySJEmS1NtVKxY+eDCstRZ85Su5UPiRR+beHJJ6llq7y5FS+ktK6YMppXcV0wdTSn/uzuD6s0mTFi92N2MG/OxnsMIK8M9/wimnwDLLNDpKSZIkSepa1YqFX3QRPP003H037LMP/OIXsPHGsMMOcPXVebRtR6STGi9SSh17QMQywP7Au4AbU0pTuyOwzho7dmyaMmVKo8NYYqNHV28i2lrTUUmSJEnqL156KSeazj8/Xx+tsEIeGGn+/OZ1hg61Rq3UHSLivpTS2KrL2koyRcQpwNCU0rHF/UHAPcDmxSpvAzunlO7q0oi7QG9PMg0YkFswVYrIozJIkiRJUn+3cCH86U+w//4wd27L5aNG5RHtJHWdtpJM7XWX2xO4s+z+gcB7gQ8CqwC3AN/qiiC1uJEjOzZfkiRJkvqbgQNh991h3rzqyx2RTqqv9pJMo4BHyu7vAvw2pXRHSukV4DRgTK07i4hdI+KJiJgaEd+ssvzHEfFAMT0ZEa+VLVtYtuy6WvfZW1Urdjd0aJ4vSZIkSWrW2o/xKcEXv2hrJqle2ksyDQTKerWyNXBH2f3ngZVq2VFEDATOA3YDNgIOjIiNytdJKR2dUto8pbQ58BPgmrLFc0rLUkp71LLP3qxasTv7E0uSJElSS9V+pB8yBHbZBX71K1h//eaBlSR1n/aSTP8GdgSIiLWBdcld5EpGAC/VuK+tgKkppadTSu8AV5K747XmQOCKGrfdJzU15Yz7okX51gSTJEmSJLVU7Uf6Cy6AG26Ap56CL30JLr00J5u+/OXmbnSOSCd1rfYKf38BOBv4LTlJ9HJKabuy5ScAW9XSsigi9gN2TSl9obj/WWDrlNJXqqw7CrgLGJFSWljMWwA8ACwAvptSurat/fX2wt+SJEmSpK7z3HNwxhlw4YW5G92HPwx33AFz5jSv44h0UvuWuPB3SulC4ChgOeBmYN+KVd4NXNQVQVY4ALi6lGAqjCqexEHAWRGxbuWDImJcREyJiCmzZs3qhrAkSZIkSb3RiBFw3nkwdSp8/vPw978vnmACmD0bxo9vTHxSX9BmS6Yu3VHENsBJKaWPFfePB0gpnVFl3fuBI1NKd1QuK5ZfAvwxpXR1a/uzJZMkSZIkqTUDBuQWTZUicskSSdUtcUumLnYvsH5ErB0Rg8mtlVqMEhcR7wVWBO4sm7diRCxd/L0KsB3wWF2iliRJkiT1Oa2NSLf88vDWW/WNReor6pZkSiktAL4C3AA8DlyVUno0Ik6JiPKaTgcAV6bFm1htCEyJiAfJ3fa+m1IyySRJkiRJWiLVRqQbOBBefx3WWw/OPRfeeacxsUm9Vd26y9Wb3eUkSZIkSW2ZNCnXYJoxI7dsmjAB1lkHjj8ebrkF1l4bTj4ZDjooJ6Ak9ZzucpIkSZIk9RhNTTBtWq7BNG1avr/NNnDzzfCXv8CKK8LBB8Pmm8N11+Wk1OjRuZ7T6NH5vqRmJpkkSZIkSSoTAR/7GNx7L1x1Ve42t+eeOeE0fXouGD59OowbZ6JJKtepJFNErBURF3VVMJIkSZIk9RQDBsCnPgWPPgorrdRy1LnZs3N3O0lZZ1syrQQc0hWBSJIkSZLUEw0aBK++Wn3ZjBn1jUXqyQa1tTAiDm7n8a0M+ihJkiRJUt8xcmTuIldp4ED4859ht93qH5PU07SZZAIuAWYDrQ1BZ00nSZIkSVKfN2FCrsE0e3bzvKWXzt3oPv5x2G8/OOssWHPNhoUoNVx7SaLngYNTSstVm4Dt6hCjJEmSJEkN1dQEEyfCqFG5MPioUfCLX+RR6SZMgD/+Ed773pxoWrCg0dFKjdFekuk+4P1tLE9AdF04kiRJkiT1TE1NOam0aFG+bWqCwYPhW9/KxcE//GE4+mjYcku4++5GRyvVX3tJph8Ct7exfCqwQ9eFI0mSJElS77POOrk1029/C7NmwTbbwOGH59ZPo0fnkepGj4ZJkxodqdR9IqXWyi31bmPHjk1TpkxpdBiSJEmSpH7mzTfhxBNz17nKS+6hQ3PiqampIaFJnRYR96WUxlZb1mZLpojYLCIs7i1JkiRJUo2WWw7OPBNWX73lstmzYfz4+sck1UN7CaT7gVVKdyLi+ohYo3tDkiRJkiSp93vhherzZ8yobxxSvbSXZKos6v1hYEg3xSJJkiRJUp8xcmT1+cssAy++WN9YpHqwK5wkSZIkSd1gwoRcg6ncUkvBO+/AJpvA73/fmLik7tJekikVU+U8SZIkSZLUhqamXOR71CiIyLcXXwwPPggjRsBee8HnPgdvvNHoSKWu0ebochGxCLgRmFfM2g24BZhdvl5KaY/uCnBJObqcJEmSJKmneucdOOUUOOMMWGstuPRS+MhHGh2V1L4lHl0OuBR4Hni5mC4Dni27X5okSZIkSVKNBg+G006D227LXeh22AGOPRbmzm10ZNKSG9TWwpTSYfUKRJIkSZKk/mabbeCBB3KC6Uc/ghtugAMPzN3sZszIxcMnTMhd76Sers3ucr2Z3eUkSZIkSb3Jn/+cE0yvv774/KFDc9LJRJN6gs50l5MkSZIkSXWw226w3HIt58+eDePH1z8eqaNMMkmSJEmS1EPMnFl9/owZ9Y1DWhImmSRJkiRJ6iFGjqw+f8iQlt3opJ7GJJMkSZIkST3EhAm5BlO5pZaCOXNg883hjjsaEpZUk5qTTBExNCK2jYi9ImKf8qk7A5QkSZIkqb9oaspFvkeNgoh8e/HFcPvt+f6HPwwnnwwLFjQ6UqmlmkaXi4iPAlcAK1dZnFJKA7s6sM5ydDlJkiRJUl/yxhtw5JFw2WWw3Xb5dvToRkel/qYrRpc7G7geGJFSGlAx9bgEkyRJkiRJfc3yy8OvfgWTJsHDD8P73geXX97oqKRmtSaZRgOnppSe78ZYJEmSJElSOw46CB54ADbZJHev++xn4cILc6umAQPy7aRJDQ5S/VKtSabbgQ26MxBJkiRJklSbtdeGW26Bk07K3ebGjYPp0yGlfDtunIkm1V+tNZn2AU4DzgQeBuaXL08p/bNbousEazJJkiRJkvqD1VeH//635fxRo2DatLqHoz6urZpMg2rcxtXF7cQqyxJgXSZJkiRJkhrgxRerz58xo75xSLUmmdbu1igkSZIkSdISGTkyd5GrtHK18eGlblRTTaaU0vS2pu4OUpIkSZIkVTdhAgwduvi8AQPgpZfgc5+Dt95qTFzqf2ot/E1EbBYRv4yIKRFxb0RcGhGbdGdwkiRJkiSpbU1NMHFirsEUkW8vvhhOOAEuuQS22ALuvbfRUao/qCnJFBF7AP8E1gL+DPwFGAncHxG7d194kiRJkiSpPU1Nucj3okX59uCD4dRTYfJkmDcPtt0WvvtdWLiwwYGqT6u1JdNpwISU0g4ppW8X0w7AGcUySZIkSZLUw3z4w/Dgg7D33nD88bDzzvDcc42OSn1VrUmm9wC/qjL/V8AGXReOJEmSJEnqSiuuCL/+NVx0EdxzD2y2GVxzDUyaBKNH5/pNo0fn+1Jn1Dq63IvAGGBqxfwxwH+7NCJJkiRJktSlIuCww+CDH4SDDoJ994VBg2DBgrx8+nQYNy7/3dTUuDjVu9XakukC4OcRMT4idiimE4CfARO7LzxJkiRJktRV1l8fbr8dll++OcFUMns2jB/fmLjUN9Takuk04C3ga8CpxbzngROBc7ohLkmSJEmS1A0GD4Y336y+bMaM+saivqWmJFNKKQE/Bn4cEcsV81o5JCVJkiRJUk82cmTuIldtvrSkau0u9z8ppTdNMEmSJEmS1HtNmABDh7acv+ee9Y9FfUerLZki4iHgIymlVyPiYSC1tm5KabPuCE6SJEmSJHW9UnHv8eNzF7k114QhQ+Dcc+F974PPfa6x8al3aqu73G+BeWV/t5pkkiRJkiRJvUtT0+Ijyb39NuyzD3z+8/D663D00Y2LTb1Tq0mmlNLJZX+fVJdoJEmSJElSQ7zrXXDddTnxdMwx8NprcNJJENHoyNRb1FSTKSJuiogVqsxfPiJu6vKoJEmSJElS3S29NFx5JRx2GJxyCnz1q7BoUaOjUm9R0+hywPbA4CrzlwE+1GXRSJIkSZKkhho0CC68EIYNg7POyl3nLrwwz5fa0uYhEhHvL7u7WUS8UnZ/IPAxYGZ3BCZJkiRJkhpjwAA480xYcUU48UR44w244orc0klqTXt5yCnkgt8J+GuV5XOAo7o6KEmSJEmS1FgR8J3vwAorwP/7f/DJT8KBB+ZudDNmwMiRMGHC4sXD1b+1l2RaGwjgaWArYFbZsneAF1NKC2vdWUTsCpxNbgV1YUrpuxXLfwzsUNwdCqyaUlqhWHYIcEKx7LSU0qW17leSJEmSJC2Z//u/3HXu0EPhppuaazRNnw7jxuW/TTQJIFJK9dlRxEDgSWBn4DngXuDAlNJjrax/FLBFSulzEbESuVXVWHKrqvuAMSmlV1vb39ixY9OUKVO6+FlIkiRJktQ/DR8OL73Ucv6oUTBtWt3DUYNExH0ppbHVltVctisiBpFbM42kogh4SumXNWxiK2BqSunpYntXAnsCVZNMwIHAicXfHwNuTCm9Ujz2RmBX4Ipa45ckSZIkSUvu5Zerz58xo75xqOeqKckUEe8F/kBz97mFxWPnA/OAWpJMawLPlt1/Dti6lf2NKvZ1UxuPXbOW2CVJkiRJUueNHJm7yFWbLwEMqHG9s8hd1IYBs4ENyV3XHgD27Ya4DgCu7ki9J4CIGBcRUyJiyqxZs9p/gCRJkiRJqsmECTB06OLzInJRcAlqTzJtSS62/TawCBiUUvoncBzwoxq3MRNYq+z+iGJeNQeweFe4mh6bUpqYUhqbUho7fPjwGsOSJEmSJEntaWqCiRNzDaYIWH31nHT64Q/hiScaHZ16glqTTEFuwQR5hLlSV7XngPVq3Ma9wPoRsXZEDCYnkq5rsaPcNW9F4M6y2TcAu0TEihGxIrBLMU+SJEmSJNVJU1Mu8r1oEfznP3DnnbBgAXzkI/DII42OTo1Wa5LpEeB9xd/3AN+IiI8AJwNTa9lASmkB8BVycuhx4KqU0qMRcUpE7FG26gHAlals2Lui4Pep5ETVvcAppSLgkiRJkiSpMTbdFG65BQYMgO23h/vvb3REaqQoy+W0vlLEx4B3pZSuiYh1gOuBDYCXgP1TSpO7NcolMHbs2DRlypRGhyFJkiRJUp83dSrsuCO8+Sb89a+w5ZaNjkjdJSLuSymNrbasppZMKaUbUkrXFH8/nVLaEFgFWK0nJpgkSZIkSVL9rLce3HorrLgi7LQT3H57oyNSI9TaXa6FlNIrqZZmUJIkSZIkqc8bPTonmlZfHT72MZg8udERqd4GtbYgIm4GakoipZR27LKIJEmSJElSrzRiRK7R9NGPwm67we9/D7vs0uioVC9ttWR6BHi0mP4FjCGPKvdcMb27mPd4N8coSZIkSZJ6iTXWyK2YNtgAdt8djj02t3IaMCDfTprU4ADVbWot/P1jYCDw/8q7yEXEWcU2/l+3RbiELPwtSZIkSVLjvPIKjBkD06YtPn/oUJg4EZqaGhKWOqnThb+Bg4Fzq9Rg+inw2c4EJ0mSJEmS+p6VVoKFC1vOnz0bxo+vfzzqfrUmmQLYtMr8avMkSZIkSZJ47rnq82fMqG8cqo9WC39XuAi4MCLWB+4q5n0AOA64uDsCkyRJkiRJvdvIkTB9evX56ntqbcl0HHAGcBRwUzEdBXy3WCZJkiRJkrSYCRNyDaZKH/5w/WNR96upJVNKaRHwfeD7EbF8Me+N7gxMkiRJkiT1bqXi3uPH5y5ya60Fq64Kv/oVbLIJHGezlT6l1u5y/2NySZIkSZIk1aqpafGR5ObPh4MPhm98A+bOhW9/GyIaF5+6TqtJpoh4CPhISunViHgYqBxZ7n9SSpt1R3CSJEmSJKlvWWopuOwyWHppOPHEnGiaMMFEU1/QVkum3wLzir+vrkMskiRJkiSpHxg4EC66KCeazjgD5syBM8800dTbtZpkSimdXO1vSZIkSZKkzhowAH72M1hmGTjrLJg3D849N89X79ThmkySJEmSJEldISInmJZZBr7//dx17oILcksn9T5t1WRqsw5TOWsySZIkSZKkJREB3/0uDBkCJ58M//oXzJwJzz4LI0fmek3lhcPVc7XVksk6TJIkSZIkqdtFwEknwWOPwW9+0zx/+nQYNy7/baKp54uUamqs1OuMHTs2TZkypdFhSJIkSZKkGo0enRNLlUaNgmnT6h2NqomI+1JKY6sts5yWJEmSJEnqEWbM6Nh89Sw1F/6OiMOAA4GRwODyZSmldbo4LkmSJEmS1M+MHFm9JdPIkfWPRR1XU0umiPg68CPgPmA0cC3wCLAScFE3xSZJkiRJkvqRCRNg6NCW8w86qP6xqONq7S73RWBcSul4YD5wbkppD3LiaVR3BSdJkiRJkvqPpiaYODHXYIqAtdaCd78bfv5zeOqpRken9tSaZBoB3FP8PQdYvvj7CmDfrg5KkiRJkiT1T01Nucj3okW5FtMtt0BKsPvu8MYbjY5Obak1yfQCsErx93Rgm+Lv9YC+OTydJEmSJElquPXWg6uvhiefzN3mFi5sdERqTa1JppuAPYq/fwGcGRE3A78GrumOwCRJkiRJkgB23BHOPhuuvx6+9a1GR6PWtDm6XER8NKX0N2AcRUIqpfSziHgV2A74LfDzbo9SkiRJkiT1a0ccAY88At//PmyyCXz2s42OSJUipdZ7u0XEImAaufXSxSml5+sUV6eNHTs2TZkypdFhSJIkSZKkLjJ/PuyyC9x5Z67VtPXWjY6o/4mI+1JKY6sta6+73Mbk7nBHAdMj4vqI2DsiBnZ1kJIkSZIkSW1Zain4zW/yiHN77QXPPdfoiFSuzSRTSunxlNKx5NHlPk0u8n0VMDMivhcRG9QhRkmSJEmSJABWWQX+8Ad4662caJo9u9ERqaSmwt8ppQUppWtSSp8ERgHnAPsAj0XErd0ZoCRJkiRJUrmNN4bLL4d//hM+/3looxKQ6qjW0eX+p6jL9FNyouk1cgFwSZIkSZKkutl9dzj9dLjyynyrxutQkikiPhoRlwPPAycDVwJViz1JkiRJkiR1p298A5qa4IQTYNVVYcAAGD0aJk1qdGT9U7tJpogYGREnRsQzwF+BNYBxwLtTSkemlO7v7iAlSZIkSZIqRcBOO+Xk0qxZudvc9OkwbpyJpkZoM8kUEX8Dnga+RG619J6U0g4ppctSSnPrEaAkSZIkSVJrTj4ZFi1afN7s2TB+fGPi6c8GtbP8bXKB7+tTSgvrEI8kSZIkSVLNZszo2Hx1nzaTTCmlPesViCRJkiRJUkeNHJm7yFWbr/rq8OhykiRJkiRJPcWECTB0aMv5n/tc/WPp70wySZIkSZKkXqupCSZOhFGjciHwESNg5ZXhoovg1VcbHV3/YpJJkiRJkiT1ak1NMG1aLgD+7LNw/fUwcyYcdlgecU71YZJJkiRJkiT1KVtvDd//Pvz+93D22Y2Opv8wySRJkiRJkvqcr34V9tgDjjsO7rmn0dH0DyaZJEmSJElSnxMBF18M7343fPrT1meqB5NMkiRJkiSpT1ppJfj1r+G55/Joc9Zn6l4mmSRJkiRJUp9Vqs907bVwzjmNjqZvM8kkSZIkSZL6tFJ9pq9/3fpM3ckkkyRJkiRJ6tNK9ZnWWMP6TN3JJJMkSZIkSerzrM/U/UwySZIkSZKkfuEDH4Dvfc/6TN2lrkmmiNg1Ip6IiKkR8c1W1tk/Ih6LiEcj4vKy+Qsj4oFiuq5+UUuSJEmSpL7i6KNh993hmGNy97kBA2D0aJg0qdGR9X6D6rWjiBgInAfsDDwH3BsR16WUHitbZ33geGC7lNKrEbFq2SbmpJQ2r1e8kiRJkiSp74mAT34S/vhHeOGFPG/6dBg3Lv/d1NS42Hq7erZk2gqYmlJ6OqX0DnAlsGfFOl8EzkspvQqQUnqxjvFJkiRJkqR+4PTTW9Zkmj0bxo9vTDx9RT2TTGsCz5bdf66YV+49wHsi4vaIuCsidi1btkxETCnm79XNsUqSJEmSpD5qxoyOzVdt6tZdrkaDgPWB7YERwK0RsWlK6TVgVEppZkSsA9wUEQ+nlJ4qf3BEjAPGAYwcObKugUuSJEmSpN5h5MjcRa7afC25erZkmgmsVXZ/RDGv3HPAdSml+SmlZ4AnyUknUkozi9ungcnAFpU7SClNTCmNTSmNHT58eNc/A0mSJEmS1OtNmABDhy4+b8AAOO20xsTTV9QzyXQvsH5ErB0Rg4EDgMpR4q4lt2IiIlYhd597OiJWjIily+ZvBzyGJEmSJElSBzU1wcSJMGpULgS+0kqwaBHMmtXoyHq3uiWZUkoLgK8ANwCPA1ellB6NiFMiYo9itRuAlyPiMeBm4OsppZeBDYEpEfFgMf+75aPSSZIkSZIkdURTE0yblpNLL70Eu+8Oxx8Pjz7a6Mh6r0iV5dT7iLFjx6YpU6Y0OgxJkiRJktQL/Pe/sMkmsNZacNddMHhwoyPqmSLivpTS2GrL6tldTpIkSZIkqUdabbXche7+++HUUxsdTe9kkkmSJEmSJAnYe2845BA4/fTcmkkdY5JJkiRJkiSpcPbZMGIEfPaz8PbbjY6mdzHJJEmSJEmSVBg2DC69FJ56Cr7+9UZH07uYZJIkSZIkSSqz/fZw9NFw/vlwww2Njqb3MMkkSZIkSZJUYcIE2GgjOOwweOWVRkfTO5hkkiRJkiRJqrDMMnDZZTBrFhxxRKOj6R1MMkmSJEmSJFWxxRZw0knw61/DFVc0OpqezySTJEmSJElSK77xDfjAB3JrppkzGx1Nz2aSSZIkSZIkqRWDBsEvfwnvvAO77gqjRsGAATB6NEya1OjoehaTTJIkSZIkSW1Yf33Yf3945BGYMQNSgunTYdw4E03lTDJJkiRJkiS146abWs6bPRvGj69/LD2VSSZJkiRJkqR2PPts9fkzZtQ3jp7MJJMkSZIkSVI7Ro7s2Pz+yCSTJEmSJElSOyZMgKFDF583ZEier8wkkyRJkiRJUjuammDixDy6XESet/XWeb4yk0ySJEmSJEk1aGqCadNg0SL42tdg8mS4445GR9VzmGSSJEmSJEnqoJNOghEj4PDDYcGCRkfTM5hkkiRJkiRJ6qBll4Wzz4aHHoJzzml0ND2DSSZJkiRJkqQlsPfe8PGPw4knwnPPNTqaxjPJJEmSJEmStAQi4Cc/yd3ljj660dE0nkkmSZIkSZKkJbTOOnDCCXD11fCXvzQ6msYyySRJkiRJktQJxx4LG2wARx4Jc+Y0OprGMckkSZIkSZLUCUsvDT/9KTz9NJxxRqOjaRyTTJIkSZIkSZ20447Q1ATf+x48+WSjo2kMk0ySJEmSJEld4Ic/hCFD4IgjIKVGR1N/JpkkSZIkSZK6wOqrw4QJ8Pe/w5VXNjqa+jPJJEmSJEmS1EW+/GUYOxaOOQZef73R0dSXSSZJkiRJkqQuMnAgnH8+/Pe/8O1vNzqa+jLJJEmSJEmS1IXGjs11mc47D/75z0ZHUz8mmSRJkiRJkrrYaafB8OGw334wahQMGACjR8OkSY2OrPuYZJIkSZIkSepiK6wA++4LzzwDM2bk0eamT4dx4/puoskkkyRJkiRJUjf44x9bzps9G8aPr38s9WCSSZIkSZIkqRs8+2z1+TNm1DeOejHJJEmSJEmS1A1GjuzY/N7OJJMkSZIkSVI3mDABhg5dfN7QoXl+X2SSSZIkSZIkqRs0NcHEiXl0uYh8O3Fint8XDWp0AJIkSZIkSX1VU1PfTSpVsiWTJEmSJEmSOs0kkyRJkiRJkjrNJJMkSZIkSZI6zSSTJEmSJEmSOs0kkyRJkiRJkjrNJJMkSZIkSZI6zSSTJEmSJEmSOs0kkyRJkiRJkjotUkqNjqFbRMQsYHqj4+giqwAvNToINZTHQP/m+9+/+f7LY6B/8/2Xx0D/5vuvnngMjEopDa+2oM8mmfqSiJiSUhrb6DjUOB4D/Zvvf//m+y+Pgf7N918eA/2b77962zFgdzlJkiRJkiR1mkkmSZIkSZIkdZpJpt5hYqMDUMN5DPRvvv/9m++/PAb6N99/eQz0b77/6lXHgDWZJEmSJEmS1Gm2ZJIkSZIkSVKnmWTqQSJi14h4IiKmRsQ3qyz/cET8MyIWRMR+jYhR3aeG9/+YiHgsIh6KiL9HxKhGxKnuU8Mx8OWIeDgiHoiI2yJio0bEqe7R3vtftt6+EZEioteMMqLa1PAdcGhEzCq+Ax6IiC80Ik51j1q+AyJi/+Jc4NGIuLzeMar71PD5/3HZZ//JiHitAWGqG9VwDIyMiJsj4v7ieuDjjYhT3aOG939UcQ34UERMjogRjYizFnaX6yEiYiDwJLAz8BxwL3BgSumxsnVGA8sDxwLXpZSubkCo6gY1vv87AHenlGZHxOHA9imlTzckYHW5Go+B5VNKbxR/7wEckVLatRHxqmvV8v4X6y0HXA8MBr6SUppS71jVPWr8DjgUGJtS+kpDglS3qfH9Xx+4CtgxpfRqRKyaUnqxIQGrS9X6P6Bs/aOALVJKn6tflOpONX4HTATuTymdX/zQ+KeU0uhGxKuuVeP7/xvgjymlSyNiR+CwlNJnGxJwO2zJ1HNsBUxNKT2dUnoHuBLYs3yFlNK0lNJDwKJGBKhuVcv7f3NKaXZx9y6gx2avtURqOQbeKLv7LsBfCfqOdt//wqnA94C59QxOdVHrMaC+qZb3/4vAeSmlVwFMMPUpHf38HwhcUZfIVC+1HAOJ3OAAYBjwfB3jU/eq5f3fCLip+PvmKst7DJNMPceawLNl958r5ql/6Oj7/3ngz90akeqtpmMgIo6MiKeA7wP/V6fY1P3aff8j4v3AWiml6+sZmOqm1v8D+xZN5a+OiLXqE5rqoJb3/z3AeyLi9oi4KyJsydp31HweWJRLWJvmi031DbUcAycBn4mI54A/AUfVJzTVQS3v/4PAPsXfewPLRcTKdYitw0wySb1MRHwGGAv8oNGxqP5SSuellNYFvgGc0Oh4VB8RMQA4E/hao2NRQ/0BGJ1S2gy4Ebi0wfGovgYB6wPbk1uyXBARKzQyIDXEAcDVKaWFjQ5EdXcgcElKaQTwceBXxfmB+odjgY9ExP3AR4CZQI/8HvCg7DlmAuW/SI4o5ql/qOn9j4iPAuOBPVJK8+oUm+qjo98BVwJ7dWdAqqv23v/lgE2AyRExDfgAcJ3Fv/uUdr8DUkovl333XwiMqVNs6n61/A94jlyTc35K6Rly/Y716xSfuldHzgEOwK5yfVEtx8DnyXXZSCndCSwDrFKX6NTdajkHeD6ltE9KaQvy9SAppdfqFmEHmGTqOe4F1o+ItSNiMPkfyHUNjkn10+77HxFbAD8nJ5isw9D31HIMlF9MfAL4dx3jU/dq8/1PKb2eUlolpTS6KPJ5F/m7wMLffUct3wFrlN3dA3i8jvGpe9VyHngtuRUTEbEKufvc03WMUd2npuuAiHgvsCJwZ53jU/er5RiYAewEEBEbkpNMs+oapbpLLecAq5S1XDseuKjOMdbMJFMPkVJaAHwFuIF80nhVSunRiDilGEWKiNiy6IP7KeDnEfFo4yJWV6rl/Sd3j1sW+E0xfK1JyD6kxmPgK8Ww1Q8AxwCHNCZadbUa33/1YTUeA/9XfAc8SK7JdmhjolVXq/H9vwF4OSIeIxd9/XpK6eXGRKyu1IH/AQcAVyaHB+9zajwGvgZ8sfgfcAVwqMdC31Dj+7898EREPAmsBkxoSLA1CI9LSZIkSZIkdZYtmSRJkiRJktRpJpkkSZIkSZLUaSaZJEmSJEmS1GkmmSRJkiRJktRpJpkkSZIkSZLUaSaZJEmS2hARoyMiRcTYBux7ckSc28ltbF/Ev0ob6+wXEQ45LEmSOsUkkyRJ6reK5Etb0yWNjlGSJKm3GNToACRJkhpojbK/PwlcUDFvDrDikmw4IpZKKc3vRGySJEm9ii2ZJElSv5VSeqE0Aa9VzkspvV62+qiIuDEiZkfEYxGxc2lBWZe0j0fEPRHxDvCxyI6LiKciYk5EPBwRnymPISK+ExHTI2JeRLwQEb+sCHNARJweES9FxIsR8cOIGFD2+BUj4tKIeLXYx98iYuO2nndEHFzsc3ZE/BFYbcleQUmSpGYmmSRJkmozATgHeB9wL3BlRCxbsc73gBOA9wJ3A6cBnweOBDYCzgB+HhGfAIiIfYFjgSOA9cmtqe6p2GYTsADYFvgK8FXg02XLLwG2BvYEtgJmA3+JiCHVnkREbF08ZiKwOfAH4JTaXgJJkqTWRUrWeJQkSYqI/YDfpJSiYv5o4Bngyymlnxfz1gSeAz6UUrotIrYHbgb2Syn9tljnXcBLwC4ppX+Ube8s4D0ppY9HxDHAl4BNqnWti4jJwNIppW3K5t0ITE8pfSEi1geeBD6SUrq1WD4MmAF8LaV0YVlsw1NKL0XE5cXf5S2xLgQ+X/ncJUmSOsKWTJIkSbV5qOzv54vbVSvWmVL290bAMuRWRW+VJuBwYN1ind8U6zwTEb+IiE9FxNJt7Le079J+NwQWAXeWFhZd/B4u9l/NhuXrFyrvS5IkdZiFvyVJkmrzv5ZGKaUUEdDyB7u3y/4uLdud3LKoxbZSSs9GxAbATsBHgR8BJ0bE1imlt8vXLZOq7Lcam6tLkqS6siWTJElS93gMmAeMSilNrZiml1ZKKc1NKV2fUjoa2BLYGNiuxn08Tj6fK+9OtzywabH/1h7zgYp5lfclSZI6zJZMkiRJ3SCl9GZE/BD4YeRmT7cCy5ITOotSShMj4lDy+djdwFvkgt7zgX/XuI9/R8TvycXEx5FHyJsAvAFc3srDzgHuiIjjgauB7YG9l+Q5SpIklbMlkyRJUvf5NnASeQS5R4EbgX3JhcQhJ4U+D/wDeKRYtk9K6ZnKDbXhMPKIdNcVt0OBXVNKc6qtnFK6q9jn4eR6T/sUMUqSJHWKo8tJkiRJkiSp02zJJEmSJEmSpE4zySRJkiRJkqROM8kkSZIkSZKkTjPJJEmSJEmSpE4zySRJkiRJkqROM8kkSZIkSZKkTjPJJEmSJEmSpE4zySRJkiRJkqROM8kkSZIkSZKkTvv/N32yDX4HhwEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1440x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# PLOT THRESHOLD VS. F1_SCORE\n",
    "plt.figure(figsize=(20,5))\n",
    "plt.plot(thresholds,scores,'-o',color='blue')\n",
    "plt.scatter([best_threshold], [best_score], color='blue', s=300, alpha=1)\n",
    "plt.xlabel('Threshold',size=14)\n",
    "plt.ylabel('Validation F1 Score',size=14)\n",
    "plt.title(f'Threshold vs. F1_Score with Best F1_Score = {best_score:.3f} at Best Threshold = {best_threshold:.3}',size=18)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9069142306919424\n"
     ]
    }
   ],
   "source": [
    "auc = roc_auc_score(y, train_pred['lgb'])\n",
    "f1 = best_score\n",
    "print((auc + f1) / 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 生成提交结果\n",
    "\n",
    "这里我们控制1的个数为4100个左右，最终线上得分0.7956687074375137，f1:0.6540415704387991, auc:0.9372958444362285。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4000"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# label=[1 if x >= 0.265+0.235 else 0 for x in prediction+0.235]\n",
    "# np.sum(label)\n",
    "\n",
    "label=[1 if x >= 0.26+0.24 else 0 for x in prediction+0.24]\n",
    "np.sum(label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "preliminary_a_submit_sample['related_prob'] = prediction+0.24"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "preliminary_a_submit_sample.to_csv('submit.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 进一步优化\n",
    "\n",
    "1. 由于foodid只有训练集有，那么是否可以使用food侧的特征，做相似度模型，例如共现矩阵、tfidf、embedding等。\n",
    "\n",
    "2. 目标编码做了之后，线下会涨很多但是线上长得比较少，还是过拟合比较严重，是否可以考虑根据疾病特征做聚类，然后减轻这种情况。同理food侧特征也可以做聚类，用来解决测试集都是训练集未曾出现过的id的问题。\n",
    "\n",
    "3. 交叉特征里面，对于food侧只取了一部分，是否可以多取一点（进一步上述base可以通过筛选food特征提升至7976的分数，但是会很抖）。\n",
    "\n",
    "4. 特征筛选的地方，并没有剔除缺失率高的特征，也没有根据对抗验证进行筛选，或许可以进行尝试。\n",
    "\n",
    "5. 模型训练的参数，学习率太低，叶子节点数太高，导致模型过拟合比较严重，可以考虑调参（可以上分）。\n",
    "\n",
    "6. 目前仅使用了lightgbm模型，可以考虑xgboost，catboost模型，进行模型的集成（xgb貌似还不错，需要祖传参数）。\n",
    "\n",
    "7. 上述base是1的个数为4100个，可以调整不同的个数来测试，亲测不同的特征组合哪怕只是添加一个，最优的1的个数都是不同的。米哥开源的7949的代码，只需调整1的个数就可以7956，可以考虑将两份base结合看是否可以得到更高的分数。\n",
    "\n",
    "8. svd的维度是可以调整的，不一定每个都是一样的。以及还可以使用pca的方法，不过需要进行归一化。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.6 64-bit",
   "name": "python396jvsc74a57bd031f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
  },
  "language_info": {
   "name": "python",
   "version": ""
  },
  "metadata": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}